From 983e5b6e890accd7fe9153c661314a4c49cef8d9 Mon Sep 17 00:00:00 2001 From: reginaib Date: Thu, 31 Oct 2024 13:05:57 +0100 Subject: [PATCH 01/68] periodic table refactored. --- chython/periodictable/__init__.py | 14 +- .../{element => base}/__init__.py | 9 +- .../{element => base}/dynamic.py | 56 +- .../{element => base}/element.py | 152 ++++-- chython/periodictable/{ => base}/groups.py | 2 +- chython/periodictable/{ => base}/periods.py | 2 +- chython/periodictable/base/query.py | 479 ++++++++++++++++++ chython/periodictable/element/core.py | 118 ----- chython/periodictable/element/query.py | 318 ------------ chython/periodictable/groupI.py | 8 +- chython/periodictable/groupII.py | 8 +- chython/periodictable/groupIII.py | 8 +- chython/periodictable/groupIV.py | 8 +- chython/periodictable/groupIX.py | 8 +- chython/periodictable/groupV.py | 8 +- chython/periodictable/groupVI.py | 8 +- chython/periodictable/groupVII.py | 8 +- chython/periodictable/groupVIII.py | 8 +- chython/periodictable/groupX.py | 6 +- chython/periodictable/groupXI.py | 6 +- chython/periodictable/groupXII.py | 8 +- chython/periodictable/groupXIII.py | 8 +- chython/periodictable/groupXIV.py | 6 +- chython/periodictable/groupXV.py | 6 +- chython/periodictable/groupXVI.py | 6 +- chython/periodictable/groupXVII.py | 6 +- chython/periodictable/groupXVIII.py | 8 +- 27 files changed, 700 insertions(+), 582 deletions(-) rename chython/periodictable/{element => base}/__init__.py (71%) rename chython/periodictable/{element => base}/dynamic.py (73%) rename chython/periodictable/{element => base}/element.py (79%) rename chython/periodictable/{ => base}/groups.py (95%) rename chython/periodictable/{ => base}/periods.py (93%) create mode 100644 chython/periodictable/base/query.py delete mode 100644 chython/periodictable/element/core.py delete mode 100644 chython/periodictable/element/query.py diff --git a/chython/periodictable/__init__.py b/chython/periodictable/__init__.py index 304f6e44..5f272d31 100644 --- a/chython/periodictable/__init__.py +++ b/chython/periodictable/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2021 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,9 +17,9 @@ # along with this program; if not, see . # from abc import ABCMeta -from .element import * -from .groups import * -from .periods import * +from .base import * +from .base.groups import * +from .base.periods import * from .groupI import * from .groupII import * from .groupIII import * @@ -51,9 +51,9 @@ for _class in (DynamicElement, QueryElement): for k, v in elements.items(): name = f'{_class.__name__[:-7]}{k}' - globals()[name] = cls = type(name, (_class, *v.__mro__[-3:-1]), - {'__module__': v.__module__, '__slots__': (), 'atomic_number': v.atomic_number, - 'atomic_radius': v.atomic_radius}) + globals()[name] = cls = type(name, + (_class, *v.__mro__[-3:-1]), + {'__module__': v.__module__, '__slots__': (), 'atomic_number': v.atomic_number}) setattr(modules[v.__module__], name, cls) modules[v.__module__].__all__.append(name) __all__.append(name) diff --git a/chython/periodictable/element/__init__.py b/chython/periodictable/base/__init__.py similarity index 71% rename from chython/periodictable/element/__init__.py rename to chython/periodictable/base/__init__.py index 1fecc8f4..f63b3bb6 100644 --- a/chython/periodictable/element/__init__.py +++ b/chython/periodictable/base/__init__.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov -# Copyright 2019 Tagir Akhmetshin -# Copyright 2019 Dayana Bashirova +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,10 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .core import * +from .dynamic import * from .element import * from .query import * -from .dynamic import * -__all__ = ['Core', 'Element', 'DynamicElement', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Element', 'DynamicElement', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/element/dynamic.py b/chython/periodictable/base/dynamic.py similarity index 73% rename from chython/periodictable/element/dynamic.py rename to chython/periodictable/base/dynamic.py index 70aaaabd..d0989547 100644 --- a/chython/periodictable/element/dynamic.py +++ b/chython/periodictable/base/dynamic.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,20 +16,32 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from abc import ABC -from typing import Type, Union -from .core import Core +from abc import ABC, abstractmethod +from typing import Type, Union, Optional from .element import Element -from ...exceptions import IsNotConnectedAtom -class DynamicElement(Core, ABC): - __slots__ = ('__p_charge', '__p_is_radical') +class DynamicElement(ABC): + __slots__ = ('_charge', '_is_radical', '_p_charge', '_p_is_radical', '_isotope') + + def __init__(self, isotope: Optional[int]): + self._isotope = isotope + + @property + def isotope(self): + return self._isotope @property def atomic_symbol(self) -> str: return self.__class__.__name__[7:] + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + @classmethod def from_symbol(cls, symbol: str) -> Type['DynamicElement']: """ @@ -63,19 +75,21 @@ def from_atom(cls, atom: Union['Element', 'DynamicElement']) -> 'DynamicElement' raise TypeError('Element or DynamicElement expected') return atom.copy() + @property + def charge(self) -> int: + return self._charge + + @property + def is_radical(self) -> bool: + return self._is_radical + @property def p_charge(self) -> int: - try: - return self._graph()._p_charges[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._p_charge @property def p_is_radical(self) -> bool: - try: - return self._graph()._p_radicals[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._p_is_radical def __eq__(self, other): """ @@ -96,5 +110,17 @@ def is_dynamic(self) -> bool: """ return self.charge != self.p_charge or self.is_radical != self.p_is_radical + def copy(self): + copy = object.__new__(self.__class__) + copy._isotope = self.isotope + copy._charge = self.charge + copy._is_radical = self.is_radical + copy._p_is_radical = self.p_is_radical + copy._p_charge = self.p_charge + return copy + + def __copy__(self): + return self.copy() + __all__ = ['DynamicElement'] diff --git a/chython/periodictable/element/element.py b/chython/periodictable/base/element.py similarity index 79% rename from chython/periodictable/element/element.py rename to chython/periodictable/base/element.py index 22a28386..c3703336 100644 --- a/chython/periodictable/element/element.py +++ b/chython/periodictable/base/element.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,12 +20,11 @@ from CachedMethods import class_cached_property from collections import defaultdict from typing import Dict, List, Optional, Set, Tuple, Type -from .core import Core from ...exceptions import IsNotConnectedAtom, ValenceError -class Element(Core, ABC): - __slots__ = () +class Element(ABC): + __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens') __class_cache__ = {} def __init__(self, isotope: Optional[int] = None): @@ -39,12 +38,35 @@ def __init__(self, isotope: Optional[int] = None): raise ValueError(f'isotope number {isotope} impossible or not stable for {self.atomic_symbol}') elif isotope is not None: raise TypeError('integer isotope number required') - super().__init__(isotope) + self._isotope = isotope + self._charge = 0 + self._is_radical = False + self._x = self._y = 0 + self._implicit_hydrogens = None + + def __repr__(self): + if self._isotope: + return f'{self.__class__.__name__}({self._isotope})' + return f'{self.__class__.__name__}()' @property def atomic_symbol(self) -> str: return self.__class__.__name__ + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + + @property + def isotope(self) -> Optional[int]: + """ + Isotope number + """ + return self._isotope + @property def atomic_mass(self) -> float: mass = self.isotopes_masses @@ -73,72 +95,103 @@ def atomic_radius(self) -> float: Valence radius of atom """ - @Core.charge.setter - def charge(self, charge: int): - if not isinstance(charge, int): + @property + def charge(self) -> int: + """ + Charge of atom + """ + return self._charge + + @charge.setter + def charge(self, value: int): + """ + Update charge of atom. Make sure to flush cache and recalculate hydrogens count and stereo. + Or use context manager on molecule: + + with mol: + mol.atom(1).charge = 1 + """ + if not isinstance(value, int): raise TypeError('formal charge should be int in range [-4, 4]') - elif charge > 4 or charge < -4: + elif value > 4 or value < -4: raise ValueError('formal charge should be in range [-4, 4]') - try: - g = self._graph() - g._charges[self._n] = charge - except AttributeError: - raise IsNotConnectedAtom - else: - g._calc_implicit(self._n) - g.flush_cache() - g.fix_stereo() + self._charge = value + + @property + def is_radical(self) -> bool: + """ + Radical state of atoms + """ + return self._is_radical + + @is_radical.setter + def is_radical(self, value: bool): + """ + Update radical state of atom. Make sure to flush cache and recalculate hydrogens count and stereo. + Or use context manager on molecule: - @Core.is_radical.setter - def is_radical(self, is_radical: bool): - if not isinstance(is_radical, bool): + with mol: + mol.atom(1).is_radical = True + """ + if not isinstance(value, bool): raise TypeError('bool expected') - try: - g = self._graph() - g._radicals[self._n] = is_radical - except AttributeError: - raise IsNotConnectedAtom - else: - g._calc_implicit(self._n) - g.flush_cache() - g.fix_stereo() + self._is_radical = value @property def x(self) -> float: """ X coordinate of atom on 2D plane """ - try: - return self._graph()._plane[self._n][0] - except AttributeError: - raise IsNotConnectedAtom + return self._x + + @x.setter + def x(self, value: float): + if not isinstance(value, float): + raise TypeError('float expected') + self._x = value @property def y(self) -> float: """ Y coordinate of atom on 2D plane """ - try: - return self._graph()._plane[self._n][1] - except AttributeError: - raise IsNotConnectedAtom + return self._y + + @y.setter + def y(self, value: float): + if not isinstance(value, float): + raise TypeError('float expected') + self._y = value @property def xy(self) -> Tuple[float, float]: """ (X, Y) coordinates of atom on 2D plane """ - try: - return self._graph()._plane[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._x, self._y + + @xy.setter + def xy(self, value: Tuple[float, float]): + if (not isinstance(value, (tuple, list)) + or len(value) != 2 + or not isinstance(value[0], float) + or not isinstance(value[1], float)): + raise TypeError('tuple of 2 floats expected') + self._x, self._y = value @property def implicit_hydrogens(self) -> Optional[int]: - try: - return self._graph()._hydrogens[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._implicit_hydrogens + + def copy(self): + copy = object.__new__(self.__class__) + copy._isotope = self.isotope + copy._charge = self.charge + copy._is_radical = self.is_radical + return copy + + def __copy__(self): + return self.copy() @property def explicit_hydrogens(self) -> int: @@ -149,10 +202,9 @@ def explicit_hydrogens(self) -> int: @property def total_hydrogens(self) -> int: - try: - return self._graph().total_hydrogens(self._n) - except AttributeError: - raise IsNotConnectedAtom + if self._implicit_hydrogens is None: + raise ValenceError + return self._implicit_hydrogens + self.explicit_hydrogens @property def heteroatoms(self) -> int: diff --git a/chython/periodictable/groups.py b/chython/periodictable/base/groups.py similarity index 95% rename from chython/periodictable/groups.py rename to chython/periodictable/base/groups.py index 912c9ae3..75809c61 100644 --- a/chython/periodictable/groups.py +++ b/chython/periodictable/base/groups.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/periods.py b/chython/periodictable/base/periods.py similarity index 93% rename from chython/periodictable/periods.py rename to chython/periodictable/base/periods.py index 2f3e6cba..f05e6d08 100644 --- a/chython/periodictable/periods.py +++ b/chython/periodictable/base/periods.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py new file mode 100644 index 00000000..5ae7adb5 --- /dev/null +++ b/chython/periodictable/base/query.py @@ -0,0 +1,479 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2021 Dmitrij Zanadvornykh +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from abc import ABC, abstractmethod +from functools import cached_property +from typing import Tuple, Type, List, Union, Optional +from .element import Element + + +_inorganic = {'He', 'Ne', 'Ar', 'Kr', 'Xe', 'F', 'Cl', 'Br', 'I', 'B', 'C', 'N', 'O', + 'H', 'Si', 'P', 'S', 'Se', 'Ge', 'As', 'Sb', 'Te', 'At'} + + +def _validate(value, prop): + if value is None: + return () + elif isinstance(value, int): + if value < 0 or value > 14: + raise ValueError(f'{prop} should be in range [0, 14]') + return (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(x, int) for x in value): + raise TypeError(f'{prop} should be list or tuple of ints') + if any(x < 0 or x > 14 for x in value): + raise ValueError(f'{prop} should be in range [0, 14]') + if len(set(value)) != len(value): + raise ValueError(f'{prop} should be unique') + return tuple(sorted(value)) + else: + raise TypeError(f'{prop} should be int or list or tuple of ints') + + +class Query(ABC): + __slots__ = ('_neighbors', '_hybridization', '_masked') + + def __init__(self): + self._neighbors = () + self._hybridization = () + self._masked = False + + @property + def neighbors(self) -> Tuple[int, ...]: + return self._neighbors + + @neighbors.setter + def neighbors(self, value): + self._neighbors = _validate(value, 'neighbors') + + @property + def hybridization(self) -> Tuple[int, ...]: + return self._hybridization + + @hybridization.setter + def hybridization(self, value): + if value is None: + self._hybridization = () + elif isinstance(value, int): + if value < 1 or value > 4: + raise ValueError('hybridization should be in range [1, 4]') + self._hybridization = (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(h, int) for h in value): + raise TypeError('hybridizations should be list or tuple of ints') + if any(h < 1 or h > 4 for h in value): + raise ValueError('hybridizations should be in range [1, 4]') + if len(set(value)) != len(value): + raise ValueError('hybridizations should be unique') + self._hybridization = tuple(sorted(value)) + else: + raise TypeError('hybridization should be int or list or tuple of ints') + + @property + def masked(self): + return self._masked + + @masked.setter + def masked(self, value): + if not isinstance(value, bool): + raise TypeError('masked should be bool') + self._masked = value + + def copy(self): + copy = object.__new__(self.__class__) + copy._neighbors = self.neighbors + copy._hybridization = self.hybridization + copy._masked = self.masked + return copy + + def __copy__(self): + return self.copy() + + def __repr__(self): + return f'{self.__class__.__name__}()' + + +class ExtendedQuery(Query, ABC): + __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens') + + def __init__(self): + super().__init__() + self._charge = 0 + self._is_radical = False + self._heteroatoms = () + self._ring_sizes = () + self._implicit_hydrogens = () + + @property + def charge(self) -> int: + """ + Charge of atom + """ + return self._charge + + @charge.setter + def charge(self, value: int): + if not isinstance(value, int): + raise TypeError('formal charge should be int in range [-4, 4]') + elif value > 4 or value < -4: + raise ValueError('formal charge should be in range [-4, 4]') + self._charge = value + + @property + def is_radical(self) -> bool: + """ + Radical state of atoms + """ + return self._is_radical + + @is_radical.setter + def is_radical(self, value: bool): + if not isinstance(value, bool): + raise TypeError('bool expected') + self._is_radical = value + + @property + def heteroatoms(self) -> Tuple[int, ...]: + return self._heteroatoms + + @heteroatoms.setter + def heteroatoms(self, value): + self._heteroatoms = _validate(value, 'heteroatoms') + + @property + def implicit_hydrogens(self) -> Tuple[int, ...]: + return self._implicit_hydrogens + + @implicit_hydrogens.setter + def implicit_hydrogens(self, value): + self._implicit_hydrogens = _validate(value, 'implicit hydrogens') + + @property + def ring_sizes(self) -> Tuple[int, ...]: + """ + Atom rings sizes. + """ + return self._ring_sizes + + @ring_sizes.setter + def ring_sizes(self, value): + if value is None: + self._ring_sizes = () + elif isinstance(value, int): + if value < 3 and value != 0: + raise ValueError('rings should be greater or equal 3. ring equal to zero is no ring atom mark') + self._ring_sizes = (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(x, int) for x in value): + raise TypeError('rings should be list or tuple of ints') + if any(x < 3 for x in value): + raise ValueError('rings should be greater or equal 3') + if len(set(value)) != len(value): + raise ValueError('rings should be unique') + self._ring_sizes = tuple(sorted(value)) + else: + raise TypeError('rings should be int or list or tuple of ints') + + def copy(self): + copy = super().copy() + copy._charge = self.charge + copy._is_radical = self.is_radical + copy._heteroatoms = self.heteroatoms + copy._implicit_hydrogens = self.implicit_hydrogens + copy._ring_sizes = self.ring_sizes + return copy + + +class AnyMetal(Query): + """ + Charge and radical ignored any metal. Rings, hydrogens and heteroatoms count also ignored. + + Class designed for d-elements matching in standardization. + """ + __slots__ = () + + @property + def atomic_symbol(self) -> str: + return 'M' + + def __eq__(self, other): + if isinstance(other, Element): + if other.atomic_symbol in _inorganic: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + return True + # metal is subset of metal. only + return (isinstance(other, AnyMetal) + and self.neighbors == other.neighbors + and self.hybridization == other.hybridization) + + def __hash__(self): + return hash((self.neighbors, self.hybridization)) + + +class AnyElement(ExtendedQuery): + __slots__ = () + + @property + def atomic_symbol(self) -> str: + return 'A' + + def __eq__(self, other): + """ + Compare attached to molecules elements and query elements + """ + if isinstance(other, Element): + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if set(self.ring_sizes).isdisjoint(other.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + # any is subset of any. only + return (isinstance(other, AnyElement) + and self.charge == other.charge + and self.is_radical == other.is_radical + and self.neighbors == other.neighbors + and self.hybridization == other.hybridization + and self.ring_sizes == other.ring_sizes + and self.implicit_hydrogens == other.implicit_hydrogens + and self.heteroatoms == other.heteroatoms) + + def __hash__(self): + return hash((self.charge, self.is_radical, self.neighbors, self.hybridization, + self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) + + +class ListElement(ExtendedQuery): + __slots__ = ('_elements', '__dict__') + + def __init__(self, elements: List[str]): + """ + Elements list + """ + if not isinstance(elements, (list, tuple)) or not elements: + raise ValueError('invalid elements list') + super().__init__() + self._elements = tuple(elements) + + @property + def atomic_symbol(self) -> str: + return ','.join(self._elements) + + @cached_property + def atomic_numbers(self): + return tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() if x.__name__ in self._elements) + + def copy(self): + copy = super().copy() + copy._elements = self._elements + return copy + + def __eq__(self, other): + """ + Compare attached to molecules elements and query elements + """ + if isinstance(other, Element): + if other.atomic_number not in self.atomic_numbers: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if set(self.ring_sizes).isdisjoint(other.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + # List is subset of Any and List + elif (isinstance(other, (ListElement, AnyElement)) + and self.charge == other.charge + and self.is_radical == other.is_radical + and self.neighbors == other.neighbors + and self.hybridization == other.hybridization + and self.ring_sizes == other.ring_sizes + and self.implicit_hydrogens == other.implicit_hydrogens + and self.heteroatoms == other.heteroatoms): + # list should contain all elements of other list + if isinstance(other, ListElement): + return set(self.atomic_numbers).issubset(other.atomic_numbers) + return True + return False + + def __hash__(self): + return hash((self.atomic_numbers, self.charge, self.is_radical, self.neighbors, self.hybridization, + self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) + + def __repr__(self): + return f'{self.__class__.__name__}([{",".join(self._elements)}])' + + +class QueryElement(ExtendedQuery, ABC): + __slots__ = ('_isotope',) + + def __init__(self, isotope: Optional[int]): + if isotope is not None and not isinstance(isotope, int): + raise TypeError('isotope must be an int') + super().__init__() + self._isotope = isotope + + def __repr__(self): + if self._isotope: + return f'{self.__class__.__name__}({self._isotope})' + return f'{self.__class__.__name__}()' + + @property + def atomic_symbol(self) -> str: + return self.__class__.__name__[5:] + + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + + @property + def isotope(self): + return self._isotope + + @classmethod + def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: + """ + get Element class by its symbol + """ + if symbol == 'A': + return AnyElement + elif symbol == 'M': + return AnyMetal + try: + element = next(x for x in QueryElement.__subclasses__() if x.__name__ == f'Query{symbol}') + except StopIteration: + raise ValueError(f'QueryElement with symbol "{symbol}" not found') + return element + + @classmethod + def from_atomic_number(cls, number: int) -> Type['QueryElement']: + """ + get Element class by its number + """ + try: + element = next(x for x in QueryElement.__subclasses__() if x.atomic_number.fget(None) == number) + except StopIteration: + raise ValueError(f'QueryElement with number "{number}" not found') + return element + + @classmethod + def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': + """ + get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement + """ + if isinstance(atom, Element): + # transfer true atomic props + query = cls.from_atomic_number(atom.atomic_number)(atom.isotope) + query._charge = atom.charge + query._is_radical = atom.is_radical + return query + elif not isinstance(atom, Query): + raise TypeError('Element or Query expected') + return atom.copy() + + def copy(self): + copy = super().copy() + copy._isotope = self.isotope + return copy + + def __eq__(self, other): + """ + compare attached to molecules elements and query elements + """ + if isinstance(other, Element): + if self.atomic_number != other.atomic_number: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.isotope and self.isotope != other.isotope: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if set(self.ring_sizes).isdisjoint(other.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + elif (isinstance(other, ExtendedQuery) + and self.charge == other.charge + and self.is_radical == other.is_radical + and self.neighbors == other.neighbors + and self.hybridization == other.hybridization + and self.ring_sizes == other.ring_sizes + and self.implicit_hydrogens == other.implicit_hydrogens + and self.heteroatoms == other.heteroatoms): + # query element should fully match other query element + if isinstance(other, QueryElement): + return self.atomic_number == other.atomic_number and self.isotope == other.isotope + # query element is subset of any element + elif isinstance(other, AnyElement): + return True + # query element should be in list + return isinstance(other, ListElement) and self.atomic_number in other.atomic_numbers + return False + + def __hash__(self): + return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.neighbors, + self.hybridization, self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) + + +__all__ = ['Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/element/core.py b/chython/periodictable/element/core.py deleted file mode 100644 index f5ab05ca..00000000 --- a/chython/periodictable/element/core.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020-2022 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from abc import ABC, abstractmethod -from typing import Optional, TypeVar -from weakref import ref -from ...exceptions import IsConnectedAtom, IsNotConnectedAtom - - -T = TypeVar('T') - - -class Core(ABC): - __slots__ = ('__isotope', '_graph', '_n') - - def __init__(self, isotope: Optional[int] = None): - self.__isotope = isotope - - def __repr__(self): - if self.__isotope: - return f'{self.__class__.__name__}({self.__isotope})' - return f'{self.__class__.__name__}()' - - def __getstate__(self): - return {'isotope': self.__isotope} - - def __setstate__(self, state): - self.__isotope = state['isotope'] - - @abstractmethod - def __hash__(self): - """ - Atom hash used in Morgan atom numbering algorithm. - """ - - @property - @abstractmethod - def atomic_symbol(self) -> str: - """ - Element symbol - """ - - @property - @abstractmethod - def atomic_number(self) -> int: - """ - Element number - """ - - @property - def isotope(self) -> Optional[int]: - """ - Isotope number - """ - return self.__isotope - - @property - def charge(self) -> int: - """ - Charge of atom - """ - try: - return self._graph()._charges[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def is_radical(self) -> bool: - """ - Radical state of atoms - """ - try: - return self._graph()._radicals[self._n] - except AttributeError: - raise IsNotConnectedAtom - - def copy(self: T) -> T: - """ - Detached from graph copy of element - """ - copy = object.__new__(self.__class__) - copy._Core__isotope = self.__isotope - return copy - - def _attach_graph(self, graph, n): - try: - self._graph - except AttributeError: - self._graph = ref(graph) - self._n = n - else: - raise IsConnectedAtom - - def _change_map(self, n): - try: - self._graph - except AttributeError: - raise IsNotConnectedAtom - else: - self._n = n - - -__all__ = ['Core'] diff --git a/chython/periodictable/element/query.py b/chython/periodictable/element/query.py deleted file mode 100644 index 94b9edca..00000000 --- a/chython/periodictable/element/query.py +++ /dev/null @@ -1,318 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020-2024 Ramil Nugmanov -# Copyright 2021 Dmitrij Zanadvornykh -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from abc import ABC -from typing import Tuple, Type, List, Union -from .core import Core -from .element import Element -from ...exceptions import IsNotConnectedAtom - - -_inorganic = {'He', 'Ne', 'Ar', 'Kr', 'Xe', 'F', 'Cl', 'Br', 'I', 'B', 'C', 'N', 'O', - 'H', 'Si', 'P', 'S', 'Se', 'Ge', 'As', 'Sb', 'Te', 'At'} - - -class Query(Core, ABC): - __slots__ = () - - @property - def neighbors(self) -> Tuple[int, ...]: - try: - return self._graph()._neighbors[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def hybridization(self): - try: - return self._graph()._hybridizations[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def heteroatoms(self) -> Tuple[int, ...]: - try: - return self._graph()._heteroatoms[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def ring_sizes(self) -> Tuple[int, ...]: - """ - Atom rings sizes. - """ - try: - return self._graph()._rings_sizes[self._n] - except AttributeError: - raise IsNotConnectedAtom - except KeyError: - return () - - @property - def implicit_hydrogens(self) -> Tuple[int, ...]: - try: - return self._graph()._hydrogens[self._n] - except AttributeError: - raise IsNotConnectedAtom - - -class QueryElement(Query, ABC): - __slots__ = () - - @property - def atomic_symbol(self) -> str: - return self.__class__.__name__[5:] - - @classmethod - def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: - """ - get Element class by its symbol - """ - if symbol == 'A': - return AnyElement - elif symbol == 'M': - return AnyMetal - try: - element = next(x for x in QueryElement.__subclasses__() if x.__name__ == f'Query{symbol}') - except StopIteration: - raise ValueError(f'QueryElement with symbol "{symbol}" not found') - return element - - @classmethod - def from_atomic_number(cls, number: int) -> Type['QueryElement']: - """ - get Element class by its number - """ - try: - element = next(x for x in QueryElement.__subclasses__() if x.atomic_number.fget(None) == number) - except StopIteration: - raise ValueError(f'QueryElement with number "{number}" not found') - return element - - @classmethod - def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': - """ - get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement - """ - if isinstance(atom, Element): - return cls.from_atomic_number(atom.atomic_number)(atom.isotope) - elif not isinstance(atom, Query): - raise TypeError('Element or Query expected') - return atom.copy() - - def __eq__(self, other): - """ - compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if self.atomic_number == other.atomic_number and self.charge == other.charge and \ - self.is_radical == other.is_radical: - if self.isotope and self.isotope != other.isotope: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, QueryElement) and self.atomic_number == other.atomic_number and \ - self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - # equal query element has equal query marks - return True - return False - - def __hash__(self): - return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.neighbors, - self.hybridization, self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - - -class AnyElement(Query): - __slots__ = () - - def __init__(self, *args, **kwargs): - super().__init__() - - @property - def atomic_symbol(self) -> str: - return 'A' - - @property - def atomic_number(self) -> int: - return 0 - - def __eq__(self, other): - """ - Compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if self.charge == other.charge and self.is_radical == other.is_radical: - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, AnyMetal): - return False - elif isinstance(other, Query) and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - return True - return False - - def __hash__(self): - return hash((self.charge, self.is_radical, self.neighbors, self.hybridization, self.ring_sizes, - self.implicit_hydrogens, self.heteroatoms)) - - -class AnyMetal(Query): - """ - Charge and radical ignored any metal. Rings, hydrogens and heteroatoms count also ignored. - - Class designed for d-elements matching in standardization. - """ - def __init__(self, *args, **kwargs): - super().__init__() - - @property - def atomic_symbol(self) -> str: - return 'M' - - @property - def atomic_number(self) -> int: - return 0 - - def __eq__(self, other): - if isinstance(other, Element): - if other.atomic_symbol not in _inorganic: - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - return True - elif isinstance(other, AnyMetal) and self.neighbors == other.neighbors \ - and self.hybridization == other.hybridization: - return True - return False - - def __hash__(self): - return hash((self.neighbors, self.hybridization)) - - -class ListElement(Query): - __slots__ = ('_elements', '_numbers') - - def __init__(self, elements: List[str], *args, **kwargs): - """ - Elements list - """ - super().__init__() - self._elements = tuple(elements) - self._numbers = tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() if x.__name__ in elements) - - @property - def atomic_symbol(self) -> str: - return ','.join(self._elements) - - @property - def atomic_number(self) -> int: - return 0 - - def copy(self): - copy = super().copy() - copy._elements = self._elements - copy._numbers = self._numbers - return copy - - def __eq__(self, other): - """ - Compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if other.atomic_number in self._numbers: - if self.charge != other.charge or self.is_radical != other.is_radical: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, (AnyElement, AnyMetal)): - return False - elif isinstance(other, Query) and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - if isinstance(other, ListElement): - return self._numbers == other._numbers - return other.atomic_number in self._numbers - return False - - def __hash__(self): - return hash((self._numbers, self.charge, self.is_radical, self.neighbors, self.hybridization, - self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - - def __getstate__(self): - state = super().__getstate__() - state['elements'] = self._elements - return state - - def __setstate__(self, state): - self._elements = state['elements'] - self._numbers = tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() - if x.__name__ in state['elements']) - super().__setstate__(state) - - def __repr__(self): - return f'{self.__class__.__name__}([{",".join(self._elements)}])' - - -__all__ = ['Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/groupI.py b/chython/periodictable/groupI.py index 9b06949d..a7c10f55 100644 --- a/chython/periodictable/groupI.py +++ b/chython/periodictable/groupI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,9 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupI -from .periods import * +from .base import Element +from .base.groups import GroupI +from .base.periods import * class H(Element, PeriodI, GroupI): diff --git a/chython/periodictable/groupII.py b/chython/periodictable/groupII.py index 0df4a674..bae2cf65 100644 --- a/chython/periodictable/groupII.py +++ b/chython/periodictable/groupII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class Be(Element, PeriodII, GroupII): diff --git a/chython/periodictable/groupIII.py b/chython/periodictable/groupIII.py index 60c57630..a2683f8d 100644 --- a/chython/periodictable/groupIII.py +++ b/chython/periodictable/groupIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Sc(Element, PeriodIV, GroupIII): diff --git a/chython/periodictable/groupIV.py b/chython/periodictable/groupIV.py index cc22146a..c80e1482 100644 --- a/chython/periodictable/groupIV.py +++ b/chython/periodictable/groupIV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIV -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIV +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Ti(Element, PeriodIV, GroupIV): diff --git a/chython/periodictable/groupIX.py b/chython/periodictable/groupIX.py index 6cf22449..97608fd9 100644 --- a/chython/periodictable/groupIX.py +++ b/chython/periodictable/groupIX.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIX -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIX +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Co(Element, PeriodIV, GroupIX): diff --git a/chython/periodictable/groupV.py b/chython/periodictable/groupV.py index e923cec1..66036c63 100644 --- a/chython/periodictable/groupV.py +++ b/chython/periodictable/groupV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # Copyright 2019 Tagir Akhmetshin # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupV -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupV +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class V(Element, PeriodIV, GroupV): diff --git a/chython/periodictable/groupVI.py b/chython/periodictable/groupVI.py index 6fa24b94..03b76191 100644 --- a/chython/periodictable/groupVI.py +++ b/chython/periodictable/groupVI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Dayana Bashirova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVI -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVI +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Cr(Element, PeriodIV, GroupVI): diff --git a/chython/periodictable/groupVII.py b/chython/periodictable/groupVII.py index c66e89d9..3fceee40 100644 --- a/chython/periodictable/groupVII.py +++ b/chython/periodictable/groupVII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Mn(Element, PeriodIV, GroupVII): diff --git a/chython/periodictable/groupVIII.py b/chython/periodictable/groupVIII.py index 3d88324b..ea510d60 100644 --- a/chython/periodictable/groupVIII.py +++ b/chython/periodictable/groupVIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVIII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVIII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Fe(Element, PeriodIV, GroupVIII): diff --git a/chython/periodictable/groupX.py b/chython/periodictable/groupX.py index 80a499a4..0ca6aa05 100644 --- a/chython/periodictable/groupX.py +++ b/chython/periodictable/groupX.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupX -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupX +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Ni(Element, PeriodIV, GroupX): diff --git a/chython/periodictable/groupXI.py b/chython/periodictable/groupXI.py index 40bc7c91..96be94af 100644 --- a/chython/periodictable/groupXI.py +++ b/chython/periodictable/groupXI.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXI -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXI +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Cu(Element, PeriodIV, GroupXI): diff --git a/chython/periodictable/groupXII.py b/chython/periodictable/groupXII.py index 7b48dfad..17a3e8cf 100644 --- a/chython/periodictable/groupXII.py +++ b/chython/periodictable/groupXII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Dayana Bashirova # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Zn(Element, PeriodIV, GroupXII): diff --git a/chython/periodictable/groupXIII.py b/chython/periodictable/groupXIII.py index dd5d728c..c0d3f507 100644 --- a/chython/periodictable/groupXIII.py +++ b/chython/periodictable/groupXIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXIII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXIII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class B(Element, PeriodII, GroupXIII): diff --git a/chython/periodictable/groupXIV.py b/chython/periodictable/groupXIV.py index ae2be925..0a18f705 100644 --- a/chython/periodictable/groupXIV.py +++ b/chython/periodictable/groupXIV.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXIV -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXIV +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class C(Element, PeriodII, GroupXIV): diff --git a/chython/periodictable/groupXV.py b/chython/periodictable/groupXV.py index 52f9b545..218aeecc 100644 --- a/chython/periodictable/groupXV.py +++ b/chython/periodictable/groupXV.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXV -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXV +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class N(Element, PeriodII, GroupXV): diff --git a/chython/periodictable/groupXVI.py b/chython/periodictable/groupXVI.py index fd060971..4791eb2a 100644 --- a/chython/periodictable/groupXVI.py +++ b/chython/periodictable/groupXVI.py @@ -19,9 +19,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVI -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXVI +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class O(Element, PeriodII, GroupXVI): diff --git a/chython/periodictable/groupXVII.py b/chython/periodictable/groupXVII.py index 064722c2..da6ce4c0 100644 --- a/chython/periodictable/groupXVII.py +++ b/chython/periodictable/groupXVII.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXVII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class F(Element, PeriodII, GroupXVII): diff --git a/chython/periodictable/groupXVIII.py b/chython/periodictable/groupXVIII.py index 692fd9b4..849a893c 100644 --- a/chython/periodictable/groupXVIII.py +++ b/chython/periodictable/groupXVIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVIII -from .periods import * +from .base import Element +from .base.groups import GroupXVIII +from .base.periods import * class He(Element, PeriodI, GroupXVIII): From 688a27a285b0f1ef70b2cfaed8797c86c0cadbc5 Mon Sep 17 00:00:00 2001 From: reginaib Date: Thu, 31 Oct 2024 16:29:13 +0100 Subject: [PATCH 02/68] saved --- chython/containers/graph.py | 89 +++--------- chython/containers/molecule.py | 194 ++++++++++--------------- chython/containers/query.py | 134 +---------------- chython/periodictable/base/__init__.py | 2 +- chython/periodictable/base/element.py | 73 ++++++---- chython/periodictable/base/query.py | 27 ++-- 6 files changed, 160 insertions(+), 359 deletions(-) diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 4d9ad441..17f7a175 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -29,25 +29,16 @@ class Graph(Generic[Atom, Bond], Morgan, Rings, ABC): - __slots__ = ('_atoms', '_bonds', '_charges', '_radicals', '_atoms_stereo', '_cis_trans_stereo', '_allenes_stereo', - '__dict__', '__weakref__') + __slots__ = ('_atoms', '_bonds', '_cis_trans_stereo', '__dict__', '__weakref__') __class_cache__ = {} _atoms: Dict[int, Atom] _bonds: Dict[int, Dict[int, Bond]] - _charges: Dict[int, int] - _radicals: Dict[int, bool] - _atoms_stereo: Dict[int, bool] - _allenes_stereo: Dict[int, bool] _cis_trans_stereo: Dict[Tuple[int, int], bool] def __init__(self): self._atoms = {} self._bonds = {} - self._charges = {} - self._radicals = {} - self._atoms_stereo = {} - self._allenes_stereo = {} self._cis_trans_stereo = {} def atom(self, n: int) -> Atom: @@ -99,7 +90,7 @@ def bonds_count(self) -> int: return sum(len(x) for x in self._bonds.values()) // 2 @abstractmethod - def add_atom(self, atom: Atom, n: Optional[int] = None, *, charge: int = 0, is_radical: bool = False) -> int: + def add_atom(self, atom: Atom, n: Optional[int] = None) -> int: """ new atom addition """ @@ -109,19 +100,10 @@ def add_atom(self, atom: Atom, n: Optional[int] = None, *, charge: int = 0, is_r raise TypeError('mapping should be integer') elif n in self._atoms: raise MappingError('atom with same number exists') - elif not isinstance(is_radical, bool): - raise TypeError('bool expected') - elif not isinstance(charge, int): - raise TypeError('formal charge should be int in range [-4, 4]') - elif charge > 4 or charge < -4: - raise ValueError('formal charge should be in range [-4, 4]') - - atom._attach_graph(self, n) + self._atoms[n] = atom - self._charges[n] = charge - self._radicals[n] = is_radical self._bonds[n] = {} - self.__dict__.clear() + self.flush_cache() return n @abstractmethod @@ -137,7 +119,7 @@ def add_bond(self, n: int, m: int, bond: Bond): raise MappingError('atoms already bonded') self._bonds[n][m] = self._bonds[m][n] = bond - self.__dict__.clear() + self.flush_cache() @abstractmethod def copy(self): @@ -145,14 +127,16 @@ def copy(self): copy of graph """ copy = object.__new__(self.__class__) - copy._charges = self._charges.copy() - copy._radicals = self._radicals.copy() - - copy._atoms = ca = {} - for n, atom in self._atoms.items(): - atom = atom.copy() - ca[n] = atom - atom._attach_graph(copy, n) + copy._atoms = {n: atom.copy(full=True) for n, atom in self._atoms.items()} + + copy._bonds = cb = {} + for n, m_bond in self._bonds.items(): + cb[n] = cbn = {} + for m, bond in m_bond.items(): + if m in cb: # bond partially exists. need back-connection. + cbn[m] = cb[m][n] + else: + cbn[m] = bond.copy() return copy @abstractmethod @@ -168,56 +152,19 @@ def remap(self, mapping: Dict[int, int], *, copy=False): raise ValueError('mapping overlap') mg = mapping.get - sc = self._charges - sr = self._radicals - if copy: h = self.__class__() - ha = h._atoms - hc = h._charges - hr = h._radicals - has = h._atoms_stereo - hal = h._allenes_stereo + h._atoms = {mg(n, n): atom.copy(full=True) for n, atom in self._atoms.items()} hcs = h._cis_trans_stereo - - for n, atom in self._atoms.items(): - m = mg(n, n) - atom = atom.copy() - ha[m] = atom - atom._attach_graph(h, m) else: - ha = {} - hc = {} - hr = {} - has = {} - hal = {} + self._atoms = {mg(n, n): atom for n, atom in self._atoms.items()} hcs = {} - for n, atom in self._atoms.items(): - m = mg(n, n) - ha[m] = atom - atom._change_map(m) # change mapping number - - for n in self._atoms: - m = mg(n, n) - hc[m] = sc[n] - hr[m] = sr[n] - - for n, stereo in self._atoms_stereo.items(): - has[mg(n, n)] = stereo - for n, stereo in self._allenes_stereo.items(): - hal[mg(n, n)] = stereo for (n, m), stereo in self._cis_trans_stereo.items(): hcs[(mg(n, n), mg(m, m))] = stereo if copy: return h # noqa - - self._atoms = ha - self._charges = hc - self._radicals = hr - self._atoms_stereo = has - self._allenes_stereo = hal self._cis_trans_stereo = hcs self.flush_cache() return self diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 56d6987b..2c67fed2 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,7 +19,6 @@ from CachedMethods import cached_args_method from collections import Counter, defaultdict from functools import cached_property -from numpy import uint, zeros from typing import Dict, Iterable, List, Optional, Tuple, Union from weakref import ref from zlib import compress, decompress @@ -45,37 +44,29 @@ class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, X3domMolecule): - __slots__ = ('_plane', '_conformers', '_hydrogens', '_parsed_mapping', '_backup', '__meta', '__name') - - _conformers: List[Dict[int, Tuple[float, float, float]]] - _hydrogens: Dict[int, Optional[int]] - _parsed_mapping: Dict[int, int] - _plane: Dict[int, Tuple[float, float]] + __slots__ = ('_backup', '_meta', '_name', '_changed') def __init__(self): super().__init__() - self._conformers = [] - self._hydrogens = {} - self._parsed_mapping = {} - self._plane = {} - self.__meta = None - self.__name = None + self._meta = None + self._name = None + self._changed = None @property def meta(self) -> Dict: - if self.__meta is None: - self.__meta = {} # lazy - return self.__meta + if self._meta is None: + self._meta = {} # lazy + return self._meta @property def name(self) -> str: - return self.__name or '' + return self._name or '' @name.setter def name(self, name): if not isinstance(name, str): - raise TypeError('name should be string up to 80 symbols') - self.__name = name + raise TypeError('name should be a string preferably up to 80 symbols') + self._name = name def environment(self, atom: int, include_bond: bool = True, include_atom: bool = True) -> \ Tuple[Union[Tuple[int, Bond, Element], @@ -101,10 +92,9 @@ def environment(self, atom: int, include_bond: bool = True, include_atom: bool = return tuple(self._bonds[atom].items()) return tuple(self._bonds[atom]) - @cached_args_method def neighbors(self, n: int) -> int: """number of neighbors atoms excluding any-bonded""" - return sum(b.order != 8 for b in self._bonds[n].values()) + return self._atoms[n].neighbors @cached_args_method def hybridization(self, n: int) -> int: @@ -135,8 +125,7 @@ def heteroatoms(self, n: int) -> int: """ Number of neighbored heteroatoms (not carbon or hydrogen) except any-bond connected. """ - atoms = self._atoms - return sum(atoms[m].atomic_number not in (1, 6) for m, b in self._bonds[n].items() if b.order != 8) + return self._atoms[n].heteroatoms def implicit_hydrogens(self, n: int) -> Optional[int]: """ @@ -144,26 +133,23 @@ def implicit_hydrogens(self, n: int) -> Optional[int]: Returns None if count are ambiguous. """ - return self._hydrogens[n] + return self._atoms[n].implicit_hydrogens - @cached_args_method def explicit_hydrogens(self, n: int) -> int: """ Number of explicit hydrogen atoms connected to atom. Take into account any type of bonds with hydrogen atoms. """ - atoms = self._atoms - return sum(atoms[m].atomic_number == 1 for m in self._bonds[n]) + return self._atoms[n].explicit_hydrogens - @cached_args_method def total_hydrogens(self, n: int) -> int: """ Number of hydrogen atoms connected to atom. Take into account any type of bonds with hydrogen atoms. """ - return self._hydrogens[n] + self.explicit_hydrogens(n) + return self._atoms[n].total_hydrogens @cached_args_method def adjacency_matrix(self, set_bonds=False, /): @@ -172,6 +158,8 @@ def adjacency_matrix(self, set_bonds=False, /): :param set_bonds: if True set bond orders instead of 1. """ + from numpy import uint, zeros + adj = zeros((len(self), len(self)), dtype=uint) mapping = {n: x for x, n in enumerate(self._atoms)} if set_bonds: @@ -191,24 +179,25 @@ def molecular_charge(self) -> int: """ Total charge of molecule """ - return sum(self._charges.values()) + return sum(a.charge for a in self._atoms.values()) @cached_property def is_radical(self) -> bool: """ True if at least one atom is radical """ - return any(self._radicals.values()) + return any(a.is_radical for a in self._atoms.values()) @cached_property def molecular_mass(self) -> float: - return sum(x.atomic_mass for x in self._atoms.values()) + sum(self._hydrogens.values()) * H().atomic_mass + h = H().atomic_mass + return sum(a.atomic_mass + a.implicit_hydrogens * h for a in self._atoms.values()) @cached_property def brutto(self) -> Dict[str, int]: """Counted atoms dict""" - c = Counter(x.atomic_symbol for x in self._atoms.values()) - c['H'] += sum(self._hydrogens.values()) + c = Counter(a.atomic_symbol for a in self._atoms.values()) + c['H'] += sum(a.implicit_hydrogens for a in self._atoms.values()) return dict(c) @cached_property @@ -220,8 +209,7 @@ def aromatic_rings(self) -> Tuple[Tuple[int, ...], ...]: return tuple(ring for ring in self.sssr if bonds[ring[0]][ring[-1]] == 4 and all(bonds[n][m] == 4 for n, m in zip(ring, ring[1:]))) - def add_atom(self, atom: Union[Element, int, str], *args, charge=0, is_radical=False, - xy: Tuple[float, float] = (0., 0.), _skip_hydrogen_calculation=False, **kwargs): + def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=False, **kwargs): """ Add new atom. """ @@ -232,27 +220,17 @@ def add_atom(self, atom: Union[Element, int, str], *args, charge=0, is_radical=F atom = Element.from_atomic_number(atom)() else: raise TypeError('Element object expected') - if not isinstance(xy, tuple) or len(xy) != 2 or not isinstance(xy[0], float) or not isinstance(xy[1], float): - raise TypeError('XY should be tuple with 2 float') - - n = super().add_atom(atom, *args, charge=charge, is_radical=is_radical, **kwargs) - self._plane[n] = xy - self._conformers.clear() # clean conformers. need full recalculation for new system - if _skip_hydrogen_calculation: - self._hydrogens[n] = None - elif atom.atomic_number != 1: - try: - rules = atom.valence_rules(charge, is_radical, 0) - except ValenceError: - self._hydrogens[n] = None - else: - self._hydrogens[n] = rules[0][2] # first rule without neighbors + n = super().add_atom(atom, *args, **kwargs) + if self._changed is None: + self._changed = [n] else: - self._hydrogens[n] = 0 + self._changed.append(n) + if not _skip_calculation: + self.fix_labels() return n - def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_hydrogen_calculation=False): + def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): """ Connect atoms with bonds. @@ -263,21 +241,18 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_hydrogen_calculation=F if not isinstance(bond, Bond): bond = Bond(bond) - bond._attach_graph(self, n, m) super().add_bond(n, m, bond) - self._conformers.clear() # clean conformers. need full recalculation for new system - - if _skip_hydrogen_calculation: # skip stereo fixing too - return - - self._calc_implicit(n) - self._calc_implicit(m) - - if self._atoms[n].atomic_number != 1 and self._atoms[m].atomic_number != 1: # not hydrogen - # fix stereo if formed not to hydrogen bond - self.fix_stereo() + if bond.order == 8: + return # any bond doesn't change anything + if self._changed is None: + self._changed = [n, n] + else: + self._changed.append(n) + self._changed.append(m) + if not _skip_calculation: + self.fix_labels() - def delete_atom(self, n: int, *, _skip_hydrogen_calculation=False): + def delete_atom(self, n: int, *, _skip_calculation=False): """ Remove atom. @@ -285,25 +260,25 @@ def delete_atom(self, n: int, *, _skip_hydrogen_calculation=False): Implicit hydrogens marks will not be set if atoms in aromatic rings. Call `kekule()` and `thiele()` in sequence to fix marks. """ + atoms = self._atoms ngb = self._bonds.pop(n) - fix = self._atoms.pop(n).atomic_number != 1 and ngb and not _skip_hydrogen_calculation - - del self._charges[n] - del self._radicals[n] - del self._hydrogens[n] - del self._plane[n] + atom_n = atoms.pop(n) - for m in ngb: + for m, bond in self._bonds.pop(n).items(): del self._bonds[m][n] - if not _skip_hydrogen_calculation: + if bond.order == 8: + continue + if self._changed is None: + self._changed = [m] + else: + self._changed.append(m) + atom_m = atoms[m] + atom_m._neighbors -= 1 + if atom_n.atomic_number not in (1, 6): + atom_m._heteroatoms -= 1 + if not _skip_calculation: self._calc_implicit(m) - self._conformers.clear() # clean conformers. need full recalculation for new system - try: - del self._parsed_mapping[n] - except KeyError: - pass - if fix: # hydrogen atom not used for stereo coding self.fix_stereo() self.flush_cache() @@ -396,28 +371,13 @@ def remap(self, mapping: Dict[int, int], *, copy: bool = False) -> 'MoleculeCont def copy(self) -> 'MoleculeContainer': copy = super().copy() - - copy._bonds = cb = {} - for n, m_bond in self._bonds.items(): - cb[n] = cbn = {} - for m, bond in m_bond.items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] - else: - cbn[m] = bond = bond.copy() - bond._attach_graph(copy, n, m) - - copy._MoleculeContainer__name = self.__name - if self.__meta is None: - copy._MoleculeContainer__meta = None + copy._name = self._name + if self._meta is None: + copy._meta = None else: - copy._MoleculeContainer__meta = self.__meta.copy() - copy._plane = self._plane.copy() - copy._hydrogens = self._hydrogens.copy() + copy._meta = self._meta.copy() copy._parsed_mapping = self._parsed_mapping.copy() copy._conformers = [c.copy() for c in self._conformers] - copy._atoms_stereo = self._atoms_stereo.copy() - copy._allenes_stereo = self._allenes_stereo.copy() copy._cis_trans_stereo = self._cis_trans_stereo.copy() return copy @@ -951,7 +911,7 @@ def _cpack(self, order=None, check=True): def _augmented_substructure(self, atoms: Iterable[int], deep: int): atoms = set(atoms) bonds = self._bonds - if atoms - self._atoms.keys(): + if atoms - bonds.keys(): raise ValueError('invalid atom numbers') nodes = [atoms] for _ in range(deep): @@ -967,22 +927,20 @@ def _calc_implicit(self, n: int): """ atoms = self._atoms atom = atoms[n] - if (an := atom.atomic_number) == 1: # hydrogen nether has implicit H - self._hydrogens[n] = 0 + if atom.atomic_number == 1: # hydrogen nether has implicit H + atom._implicit_hydrogens = 0 return - charge: int = self._charges[n] - is_radical = self._radicals[n] explicit_sum = 0 explicit_dict = defaultdict(int) aroma = 0 for m, bond in self._bonds[n].items(): order = bond.order if order == 4: # only neutral carbon aromatic rings supported - if not charge and not is_radical and an == 6: + if not atom.charge and not atom.is_radical and atom.atomic_number == 6: aroma += 1 else: # use `kekule()` to calculate proper implicit hydrogens count - self._hydrogens[n] = None + atom._implicit_hydrogens = None return elif order != 8: # any bond used for complexes explicit_sum += order @@ -990,32 +948,32 @@ def _calc_implicit(self, n: int): if aroma == 2: if explicit_sum == 0: # H-Ar - self._hydrogens[n] = 1 + atom._implicit_hydrogens = 1 elif explicit_sum == 1: # R-Ar - self._hydrogens[n] = 0 + atom._implicit_hydrogens = 0 else: # invalid aromaticity - self._hydrogens[n] = None + atom._implicit_hydrogens = None return elif aroma == 3: # condensed rings if explicit_sum: # invalid aromaticity - self._hydrogens[n] = None + atom._implicit_hydrogens = None else: - self._hydrogens[n] = 0 + atom._implicit_hydrogens = 0 return elif aroma: - self._hydrogens[n] = None + atom._implicit_hydrogens = None return try: - rules = atom.valence_rules(charge, is_radical, explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: - self._hydrogens[n] = None + atom._implicit_hydrogens = None return for s, d, h in rules: if s.issubset(explicit_dict) and all(explicit_dict[k] >= c for k, c in d.items()): - self._hydrogens[n] = h + atom._implicit_hydrogens = h return - self._hydrogens[n] = None # rule not found + atom._implicit_hydrogens = None # rule not found def _check_implicit(self, n: int, h: int) -> bool: atoms = self._atoms @@ -1035,7 +993,7 @@ def _check_implicit(self, n: int, h: int) -> bool: explicit_dict[(order, atoms[m].atomic_number)] += 1 try: - rules = atom.valence_rules(self._charges[n], self._radicals[n], explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: return False for s, d, _h in rules: diff --git a/chython/containers/query.py b/chython/containers/query.py index abe4dcaf..5024e915 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -24,157 +24,35 @@ from ..algorithms.smiles import QuerySmiles from ..algorithms.stereo import Stereo from ..periodictable import Element, ListElement, QueryElement -from ..periodictable.element import Query - - -def _validate_neighbors(neighbors): - if neighbors is None: - neighbors = () - elif isinstance(neighbors, int): - if neighbors < 0 or neighbors > 14: - raise ValueError('neighbors should be in range [0, 14]') - neighbors = (neighbors,) - elif isinstance(neighbors, (tuple, list)): - if not all(isinstance(n, int) for n in neighbors): - raise TypeError('neighbors should be list or tuple of ints') - if any(n < 0 or n > 14 for n in neighbors): - raise ValueError('neighbors should be in range [0, 14]') - if len(set(neighbors)) != len(neighbors): - raise ValueError('neighbors should be unique') - neighbors = tuple(sorted(neighbors)) - else: - raise TypeError('neighbors should be int or list or tuple of ints') - return neighbors +from ..periodictable.base import Query class QueryContainer(Stereo, Graph[Query, QueryBond], QueryIsomorphism, QuerySmiles): - __slots__ = ('_neighbors', '_hybridizations', '_hydrogens', '_rings_sizes', '_heteroatoms', '_masked') - - _neighbors: Dict[int, Tuple[int, ...]] - _hybridizations: Dict[int, Tuple[int, ...]] - _hydrogens: Dict[int, Tuple[int, ...]] - _rings_sizes: Dict[int, Tuple[int, ...]] - _heteroatoms: Dict[int, Tuple[int, ...]] - _masked: Dict[int, bool] - - def __init__(self): - super().__init__() - self._neighbors = {} - self._hybridizations = {} - self._hydrogens = {} - self._rings_sizes = {} - self._heteroatoms = {} - self._masked = {} - - def add_atom(self, atom: Union[Query, Element, int, str], *args, - neighbors: Union[int, List[int], Tuple[int, ...], None] = None, - hybridization: Union[int, List[int], Tuple[int, ...], None] = None, - hydrogens: Union[int, List[int], Tuple[int, ...], None] = None, - rings_sizes: Union[int, List[int], Tuple[int, ...], None] = None, - heteroatoms: Union[int, List[int], Tuple[int, ...], None] = None, - masked: bool = False, **kwargs): - if hybridization is None: - hybridization = () - elif isinstance(hybridization, int): - if hybridization < 1 or hybridization > 4: - raise ValueError('hybridization should be in range [1, 4]') - hybridization = (hybridization,) - elif isinstance(hybridization, (tuple, list)): - if not all(isinstance(h, int) for h in hybridization): - raise TypeError('hybridizations should be list or tuple of ints') - if any(h < 1 or h > 4 for h in hybridization): - raise ValueError('hybridizations should be in range [1, 4]') - if len(set(hybridization)) != len(hybridization): - raise ValueError('hybridizations should be unique') - hybridization = tuple(sorted(hybridization)) - else: - raise TypeError('hybridization should be int or list or tuple of ints') - - if rings_sizes is None: - rings_sizes = () - elif isinstance(rings_sizes, int): - if rings_sizes < 3 and rings_sizes != 0: - raise ValueError('rings should be greater or equal 3. ring equal to zero is no ring atom mark') - rings_sizes = (rings_sizes,) - elif isinstance(rings_sizes, (tuple, list)): - if not all(isinstance(n, int) for n in rings_sizes): - raise TypeError('rings should be list or tuple of ints') - if any(n < 3 for n in rings_sizes): - raise ValueError('rings should be greater or equal 3') - if len(set(rings_sizes)) != len(rings_sizes): - raise ValueError('rings should be unique') - rings_sizes = tuple(sorted(rings_sizes)) - else: - raise TypeError('rings should be int or list or tuple of ints') - - neighbors = _validate_neighbors(neighbors) - hydrogens = _validate_neighbors(hydrogens) - heteroatoms = _validate_neighbors(heteroatoms) + __slots__ = () + def add_atom(self, atom: Union[Query, Element, int, str], *args, **kwargs): if not isinstance(atom, Query): + # set only basic labels: charge, radical, isotope. use Query object directly for the full control. if isinstance(atom, Element): - atom = QueryElement.from_atomic_number(atom.atomic_number)(atom.isotope) + atom = QueryElement.from_atom(atom) elif isinstance(atom, str): atom = QueryElement.from_symbol(atom)() elif isinstance(atom, int): atom = QueryElement.from_atomic_number(atom)() else: raise TypeError('QueryElement object expected') - - n = super().add_atom(atom, *args, **kwargs) - self._neighbors[n] = neighbors - self._hybridizations[n] = hybridization - self._hydrogens[n] = hydrogens - self._rings_sizes[n] = rings_sizes - self._heteroatoms[n] = heteroatoms - self._masked[n] = masked - return n + return super().add_atom(atom, *args, **kwargs) def add_bond(self, n, m, bond: Union[QueryBond, Bond, int, Tuple[int, ...]]): if isinstance(bond, Bond): bond = QueryBond.from_bond(bond) elif not isinstance(bond, QueryBond): bond = QueryBond(bond) - - sct = self._stereo_cis_trans_paths # save - sa = self._stereo_allenes_paths - super().add_bond(n, m, bond) - # remove stereo marks on bonded atoms and all its bonds - if n in self._atoms_stereo: - del self._atoms_stereo[n] - if m in self._atoms_stereo: - del self._atoms_stereo[m] - if self._cis_trans_stereo: - for nm, path in sct.items(): - if (n in path or m in path) and nm in self._cis_trans_stereo: - del self._cis_trans_stereo[nm] - if self._allenes_stereo: - for c, path in sa.items(): - if (n in path or m in path) and c in self._allenes_stereo: - del self._allenes_stereo[c] def copy(self) -> 'QueryContainer': copy = super().copy() - - copy._bonds = cb = {} - for n, m_bond in self._bonds.items(): - cb[n] = cbn = {} - for m, bond in m_bond.items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] - else: - cbn[m] = bond.copy() - - copy._neighbors = self._neighbors.copy() - copy._hybridizations = self._hybridizations.copy() - copy._hydrogens = self._hydrogens.copy() - copy._heteroatoms = self._heteroatoms.copy() - copy._rings_sizes = self._rings_sizes.copy() - copy._atoms_stereo = self._atoms_stereo.copy() - copy._allenes_stereo = self._allenes_stereo.copy() copy._cis_trans_stereo = self._cis_trans_stereo.copy() - copy._masked = self._masked.copy() return copy def union(self, other: 'QueryContainer', *, remap: bool = False, copy: bool = True) -> 'QueryContainer': diff --git a/chython/periodictable/base/__init__.py b/chython/periodictable/base/__init__.py index f63b3bb6..f8ca87e8 100644 --- a/chython/periodictable/base/__init__.py +++ b/chython/periodictable/base/__init__.py @@ -21,4 +21,4 @@ from .query import * -__all__ = ['Element', 'DynamicElement', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Element', 'DynamicElement', 'Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index c3703336..d1c1edd0 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -24,7 +24,9 @@ class Element(ABC): - __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens') + __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens', + '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_xyz', + '_neighbors', '_heteroatoms', '_hybridization') __class_cache__ = {} def __init__(self, isotope: Optional[int] = None): @@ -43,6 +45,11 @@ def __init__(self, isotope: Optional[int] = None): self._is_radical = False self._x = self._y = 0 self._implicit_hydrogens = None + self._explicit_hydrogens = 0 + self._neighbors = 0 + self._heteroatoms = 0 + self._hybridization = 1 + self._stereo = None def __repr__(self): if self._isotope: @@ -183,45 +190,33 @@ def xy(self, value: Tuple[float, float]): def implicit_hydrogens(self) -> Optional[int]: return self._implicit_hydrogens - def copy(self): - copy = object.__new__(self.__class__) - copy._isotope = self.isotope - copy._charge = self.charge - copy._is_radical = self.is_radical - return copy - - def __copy__(self): - return self.copy() - @property def explicit_hydrogens(self) -> int: - try: - return self._graph().explicit_hydrogens(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._explicit_hydrogens @property def total_hydrogens(self) -> int: - if self._implicit_hydrogens is None: + if self.implicit_hydrogens is None: raise ValenceError - return self._implicit_hydrogens + self.explicit_hydrogens + return self.implicit_hydrogens + self.explicit_hydrogens + + @property + def stereo(self): + """ + Tetrahedron or allene stereo label + """ + return self._stereo @property def heteroatoms(self) -> int: - try: - return self._graph().heteroatoms(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._heteroatoms @property def neighbors(self) -> int: """ Neighbors count of atom """ - try: - return self._graph().neighbors(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._neighbors @property def hybridization(self): @@ -230,10 +225,26 @@ def hybridization(self): of single bonded, 3 - if has one triple bonded and any amount of double and single bonded neighbors or two double bonded and any amount of single bonded neighbors, 4 - if atom in aromatic ring. """ - try: - return self._graph().hybridization(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._hybridization + + def copy(self, full=False): + copy = object.__new__(self.__class__) + copy._isotope = self.isotope + copy._charge = self.charge + copy._is_radical = self.is_radical + if full: + copy._x = self.x + copy._y = self.y + copy._implicit_hydrogens = self.implicit_hydrogens + copy._explicit_hydrogens = self.explicit_hydrogens + copy._stereo = self.stereo + copy._neighbors = self.neighbors + copy._heteroatoms = self.heteroatoms + copy._hybridization = self.hybridization + return copy + + def __copy__(self): + return self.copy() @property def ring_sizes(self) -> Tuple[int, ...]: @@ -302,13 +313,13 @@ def __eq__(self, other): def __hash__(self): return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.implicit_hydrogens or 0)) - def valence_rules(self, charge: int, is_radical: bool, valence: int) -> \ + def valence_rules(self, valence: int) -> \ List[Tuple[Set[Tuple[int, 'Element']], Dict[Tuple[int, 'Element'], int], int]]: """ valence rules for element with specific charge/radical state """ try: - return self._compiled_valence_rules[(charge, is_radical, valence)] + return self._compiled_valence_rules[(self.charge, self.is_radical, valence)] except KeyError: raise ValenceError diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 5ae7adb5..2cc55367 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -47,12 +47,13 @@ def _validate(value, prop): class Query(ABC): - __slots__ = ('_neighbors', '_hybridization', '_masked') + __slots__ = ('_neighbors', '_hybridization', '_masked', '_stereo') def __init__(self): self._neighbors = () self._hybridization = () self._masked = False + self._stereo = None @property def neighbors(self) -> Tuple[int, ...]: @@ -95,11 +96,17 @@ def masked(self, value): raise TypeError('masked should be bool') self._masked = value - def copy(self): + @property + def stereo(self): + return self._stereo + + def copy(self, full=False): copy = object.__new__(self.__class__) copy._neighbors = self.neighbors copy._hybridization = self.hybridization - copy._masked = self.masked + if full: + copy._masked = self.masked + copy._stereo = self.stereo return copy def __copy__(self): @@ -190,8 +197,8 @@ def ring_sizes(self, value): else: raise TypeError('rings should be int or list or tuple of ints') - def copy(self): - copy = super().copy() + def copy(self, full=False): + copy = super().copy(full=full) copy._charge = self.charge copy._is_radical = self.is_radical copy._heteroatoms = self.heteroatoms @@ -296,8 +303,8 @@ def atomic_symbol(self) -> str: def atomic_numbers(self): return tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() if x.__name__ in self._elements) - def copy(self): - copy = super().copy() + def copy(self, full=False): + copy = super().copy(full=full) copy._elements = self._elements return copy @@ -353,7 +360,7 @@ def __repr__(self): class QueryElement(ExtendedQuery, ABC): __slots__ = ('_isotope',) - def __init__(self, isotope: Optional[int]): + def __init__(self, isotope: Optional[int] = None): if isotope is not None and not isinstance(isotope, int): raise TypeError('isotope must be an int') super().__init__() @@ -420,8 +427,8 @@ def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': raise TypeError('Element or Query expected') return atom.copy() - def copy(self): - copy = super().copy() + def copy(self, full=False): + copy = super().copy(full=full) copy._isotope = self.isotope return copy From 9430396318951b9e56ac7cae0ed8acef7ad18a42 Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 11:45:20 +0100 Subject: [PATCH 03/68] Refactor and clean up molecule and bond handling Refactored molecule.py, bonds.py, graph.py, and query.py for improved clarity and efficiency. Removed unused methods and redundant code, consolidated bond handling logic, and replaced lists with sets for tracking changes. --- chython/containers/bonds.py | 162 ++++++++++++----------- chython/containers/graph.py | 73 ++--------- chython/containers/molecule.py | 178 ++++---------------------- chython/containers/query.py | 157 +---------------------- chython/periodictable/base/element.py | 50 ++++---- chython/periodictable/base/query.py | 6 +- pyproject.toml | 2 +- 7 files changed, 145 insertions(+), 483 deletions(-) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index cb61af29..e6014c1e 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2022 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,94 +17,74 @@ # along with this program; if not, see . # from typing import Optional, Tuple, Union, List, Set -from weakref import ref -from ..exceptions import IsConnectedBond, IsNotConnectedBond class Bond: - __slots__ = ('__order', '__graph', '__n', '__m') + __slots__ = ('_order', '_in_ring', '_stereo') def __init__(self, order: int): if not isinstance(order, int): raise TypeError('invalid order value') elif order not in (1, 4, 2, 3, 8): raise ValueError('order should be from [1, 2, 3, 4, 8]') - self.__order = order + self._order = order + self._in_ring = False + self._stereo = None def __eq__(self, other): if isinstance(other, Bond): - return self.__order == other.order + return self.order == other.order elif isinstance(other, int): - return self.__order == other + return self.order == other return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order})' + return f'{self.__class__.__name__}({self.order})' def __int__(self): """ Bond order. """ - return self.__order + return self.order def __hash__(self): """ Bond order. Used in Morgan atoms ordering. """ - return self.__order - - def __getstate__(self): - return {'order': self.__order} - - def __setstate__(self, state): - self.__order = state['order'] + return self.order @property def order(self) -> int: - return self.__order + return self._order + + @property + def stereo(self) -> Optional[bool]: + return self._stereo @property def in_ring(self) -> bool: - try: - return self.__graph().is_ring_bond(self.__n, self.__m) - except AttributeError: - raise IsNotConnectedBond + return self._in_ring - def copy(self) -> 'Bond': + def copy(self, full=False) -> 'Bond': copy = object.__new__(self.__class__) - copy._Bond__order = self.__order + copy._order = self.order + if full: + copy._stereo = self.stereo + copy._in_ring = self.in_ring return copy + def __copy__(self): + return self.copy() + @classmethod def from_bond(cls, bond): - if isinstance(bond, cls): - copy = object.__new__(cls) - copy._Bond__order = bond.order - return copy + if isinstance(bond, Bond): + return cls(bond.order) raise TypeError('Bond expected') - def _attach_graph(self, graph, n, m): - try: - self.__graph - except AttributeError: - self.__graph = ref(graph) - self.__n = n - self.__m = m - else: - raise IsConnectedBond - - def _change_map(self, n, m): - try: - self.__graph - except AttributeError: - raise IsNotConnectedBond - else: - self.__n = n - self.__m = m - class DynamicBond: - __slots__ = ('__order', '__p_order') + __slots__ = ('_order', '_p_order') def __init__(self, order=None, p_order=None): if order is None: @@ -118,16 +98,16 @@ def __init__(self, order=None, p_order=None): if order not in (1, 4, 2, 3, None, 8) or p_order not in (1, 4, 2, 3, None, 8): raise ValueError('order or p_order should be from [1, 2, 3, 4, 8]') - self.__order = order - self.__p_order = p_order + self._order = order + self._p_order = p_order def __eq__(self, other): if isinstance(other, DynamicBond): - return self.__order == other.order and self.__p_order == other.p_order + return self.order == other.order and self.p_order == other.p_order return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order}, {self.__p_order})' + return f'{self.__class__.__name__}({self.order}, {self.p_order})' def __int__(self): """ @@ -139,47 +119,51 @@ def __hash__(self): """ Hash of bond orders. """ - return hash((self.__order or 0, self.__p_order or 0)) + return hash((self.order or 0, self.p_order or 0)) @property def is_dynamic(self) -> bool: """ Bond has dynamic features """ - return self.__order != self.__p_order + return self.order != self.p_order @property def order(self) -> Optional[int]: - return self.__order + return self._order @property def p_order(self) -> Optional[int]: - return self.__p_order + return self._p_order def copy(self) -> 'DynamicBond': copy = object.__new__(self.__class__) - copy._DynamicBond__order = self.__order - copy._DynamicBond__p_order = self.__p_order + copy._order = self.order + copy._p_order = self.p_order return copy + def __copy__(self): + return self.copy() + @classmethod def from_bond(cls, bond): if isinstance(bond, Bond): copy = object.__new__(cls) - copy._DynamicBond__order = copy._DynamicBond__p_order = bond.order + copy._order = copy._p_order = bond.order return copy elif isinstance(bond, cls): copy = object.__new__(cls) - copy._DynamicBond__order = bond.order - copy._DynamicBond__p_order = bond.p_order + copy._order = bond.order + copy._p_order = bond.p_order return copy raise TypeError('DynamicBond expected') class QueryBond: - __slots__ = ('__order', '__in_ring') + __slots__ = ('_order', '_in_ring', '_stereo') - def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], in_ring: Optional[bool] = None): + def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], + in_ring: Optional[bool] = None, stereo: Optional[bool] = None): if isinstance(order, (list, tuple, set)): if not all(isinstance(x, int) for x in order): raise TypeError('invalid order value') @@ -194,63 +178,75 @@ def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], in_r raise TypeError('invalid order value') if in_ring is not None and not isinstance(in_ring, bool): raise TypeError('in_ring mark should be boolean or None') - self.__order = order - self.__in_ring = in_ring + if stereo is not None and not isinstance(stereo, bool): + raise TypeError('stereo mark should be boolean or None') + self._order = order + self._in_ring = in_ring + self._stereo = stereo def __eq__(self, other): if isinstance(other, Bond): - if self.__in_ring is not None: - if self.__in_ring != other.in_ring: + if self.in_ring is not None: + if self.in_ring != other.in_ring: return False - return other.order in self.__order + return other.order in self.order elif isinstance(other, QueryBond): - return self.__order == other.order and self.__in_ring == other.in_ring + return self.order == other.order and self.in_ring == other.in_ring elif isinstance(other, int): - return other in self.__order + return other in self.order return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order}, {self.__in_ring})' + return f'{self.__class__.__name__}({self.order}, {self.in_ring})' def __int__(self): """ Simple bond order or hash of sorted tuple of orders. """ - if len(self.__order) == 1: - return self.__order[0] - return hash(self.__order) + if len(self.order) == 1: + return self.order[0] + return hash(self.order) def __hash__(self): """ Hash of orders and cycle mark. Used in Morgan atoms ordering. """ - return hash((self.__order, self.__in_ring)) + return hash((self.order, self.in_ring)) @property def order(self) -> Tuple[int, ...]: - return self.__order + return self._order @property def in_ring(self) -> Optional[bool]: - return self.__in_ring + return self._in_ring + + @property + def stereo(self): + return self._stereo - def copy(self) -> 'QueryBond': + def copy(self, full=False) -> 'QueryBond': copy = object.__new__(self.__class__) - copy._QueryBond__order = self.__order - copy._QueryBond__in_ring = self.__in_ring + copy._order = self.order + copy._in_ring = self.in_ring + if full: + copy._stereo = self.stereo return copy + def __copy__(self): + return self.copy() + @classmethod def from_bond(cls, bond): if isinstance(bond, Bond): copy = object.__new__(cls) - copy._QueryBond__order = (bond.order,) - copy._QueryBond__in_ring = None + copy._order = (bond.order,) + copy._in_ring = None return copy elif isinstance(bond, cls): copy = object.__new__(cls) - copy._QueryBond__order = bond.order - copy._QueryBond__in_ring = bond.in_ring + copy._order = bond.order + copy._in_ring = bond.in_ring return copy raise TypeError('QueryBond or Bond expected') diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 17f7a175..54470b35 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -29,17 +29,15 @@ class Graph(Generic[Atom, Bond], Morgan, Rings, ABC): - __slots__ = ('_atoms', '_bonds', '_cis_trans_stereo', '__dict__', '__weakref__') + __slots__ = ('_atoms', '_bonds', '__dict__') __class_cache__ = {} _atoms: Dict[int, Atom] _bonds: Dict[int, Dict[int, Bond]] - _cis_trans_stereo: Dict[Tuple[int, int], bool] def __init__(self): self._atoms = {} self._bonds = {} - self._cis_trans_stereo = {} def atom(self, n: int) -> Atom: return self._atoms[n] @@ -121,14 +119,12 @@ def add_bond(self, n: int, m: int, bond: Bond): self._bonds[n][m] = self._bonds[m][n] = bond self.flush_cache() - @abstractmethod def copy(self): """ copy of graph """ copy = object.__new__(self.__class__) copy._atoms = {n: atom.copy(full=True) for n, atom in self._atoms.items()} - copy._bonds = cb = {} for n, m_bond in self._bonds.items(): cb[n] = cbn = {} @@ -139,63 +135,39 @@ def copy(self): cbn[m] = bond.copy() return copy - @abstractmethod - def remap(self, mapping: Dict[int, int], *, copy=False): + def remap(self, mapping: Dict[int, int]): """ Change atom numbers :param mapping: mapping of old numbers to the new - :param copy: keep original graph """ if len(mapping) != len(set(mapping.values())) or \ not (self._atoms.keys() - mapping.keys()).isdisjoint(mapping.values()): raise ValueError('mapping overlap') mg = mapping.get - if copy: - h = self.__class__() - h._atoms = {mg(n, n): atom.copy(full=True) for n, atom in self._atoms.items()} - hcs = h._cis_trans_stereo - else: - self._atoms = {mg(n, n): atom for n, atom in self._atoms.items()} - hcs = {} - - for (n, m), stereo in self._cis_trans_stereo.items(): - hcs[(mg(n, n), mg(m, m))] = stereo - - if copy: - return h # noqa - self._cis_trans_stereo = hcs + self._atoms = {mg(n, n): atom for n, atom in self._atoms.items()} + self._bonds = {mg(n, n): {mg(m, m): bond for m, bond in m_bond.items()} for n, m_bond in self._bonds.items()} self.flush_cache() - return self - @abstractmethod def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): """ Merge Graphs into one. :param remap: if atoms has collisions then remap other graph atoms else raise exception. - :param copy: keep original structure and return new object + :param copy: keep original structure and return a new object """ if self._atoms.keys() & other._atoms.keys(): - if remap: - other = other.remap({n: i for i, n in enumerate(other, start=max(self._atoms) + 1)}, copy=True) - else: + if not remap: raise MappingError('mapping of graphs is not disjoint') - + other = other.copy() + other.remap({n: i for i, n in enumerate(other, start=max(self._atoms) + 1)}) + else: + other = other.copy() # make a copy u = self.copy() if copy else self - u._charges.update(other._charges) - u._radicals.update(other._radicals) - - ua = u._atoms - for n, atom in other._atoms.items(): - ua[n] = atom = atom.copy() - atom._attach_graph(u, n) - - u._atoms_stereo.update(other._atoms_stereo) - u._allenes_stereo.update(other._allenes_stereo) - u._cis_trans_stereo.update(other._cis_trans_stereo) - return u, other + u._atoms.update(other._atoms) + u._bonds.update(other._bonds) + return u def flush_cache(self): self.__dict__.clear() @@ -224,24 +196,5 @@ def __iter__(self) -> Iterator[int]: def __bool__(self): return bool(self._atoms) - def __getstate__(self): - state = {'atoms': self._atoms, 'bonds': self._bonds, 'charges': self._charges, - 'radicals': self._radicals} - from chython import pickle_cache - - if pickle_cache: - state['cache'] = {k: v for k, v in self.__dict__.items() if k != '__cached_method___hash__'} - return state - - def __setstate__(self, state): - self._atoms = state['atoms'] - for n, a in state['atoms'].items(): - a._attach_graph(self, n) - self._charges = state['charges'] - self._radicals = state['radicals'] - self._bonds = state['bonds'] - if 'cache' in state: - self.__dict__.update(state['cache']) - __all__ = ['Graph'] diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 2c67fed2..a4b5c8ef 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -105,20 +105,7 @@ def hybridization(self, n: int) -> int: of single bonded, 3 - if has one triple bonded and any amount of double and single bonded neighbors or two and more double bonded and any amount of single bonded neighbors, 4 - if atom in aromatic ring. """ - hybridization = 1 - for bond in self._bonds[n].values(): - order = bond.order - if order == 4: - return 4 - elif order == 3: - if hybridization != 3: - hybridization = 3 - elif order == 2: - if hybridization == 1: - hybridization = 2 - elif hybridization == 2: - hybridization = 3 - return hybridization + return self._atoms[n].hybridization @cached_args_method def heteroatoms(self, n: int) -> int: @@ -223,9 +210,9 @@ def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=Fals n = super().add_atom(atom, *args, **kwargs) if self._changed is None: - self._changed = [n] + self._changed = {n} else: - self._changed.append(n) + self._changed.add(n) if not _skip_calculation: self.fix_labels() return n @@ -245,10 +232,10 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): if bond.order == 8: return # any bond doesn't change anything if self._changed is None: - self._changed = [n, n] + self._changed = {n, m} else: - self._changed.append(n) - self._changed.append(m) + self._changed.add(n) + self._changed.add(m) if not _skip_calculation: self.fix_labels() @@ -260,30 +247,19 @@ def delete_atom(self, n: int, *, _skip_calculation=False): Implicit hydrogens marks will not be set if atoms in aromatic rings. Call `kekule()` and `thiele()` in sequence to fix marks. """ - atoms = self._atoms - ngb = self._bonds.pop(n) - atom_n = atoms.pop(n) - + del self._atoms[n] for m, bond in self._bonds.pop(n).items(): del self._bonds[m][n] if bond.order == 8: continue if self._changed is None: - self._changed = [m] + self._changed = {m} else: - self._changed.append(m) - atom_m = atoms[m] - atom_m._neighbors -= 1 - if atom_n.atomic_number not in (1, 6): - atom_m._heteroatoms -= 1 - if not _skip_calculation: - self._calc_implicit(m) - - if fix: # hydrogen atom not used for stereo coding - self.fix_stereo() - self.flush_cache() - - def delete_bond(self, n: int, m: int, *, _skip_hydrogen_calculation=False): + self._changed.add(m) + if not _skip_calculation: + self.fix_labels() + + def delete_bond(self, n: int, m: int, *, _skip_calculation=False): """ Disconnect atoms. @@ -292,82 +268,14 @@ def delete_bond(self, n: int, m: int, *, _skip_hydrogen_calculation=False): Call `kekule()` and `thiele()` in sequence to fix marks. """ del self._bonds[n][m] - del self._bonds[m][n] - self._conformers.clear() # clean conformers. need full recalculation for new system - - if not _skip_hydrogen_calculation: - self._calc_implicit(n) - self._calc_implicit(m) - - if self._atoms[n].atomic_number != 1 and self._atoms[m].atomic_number != 1 and not _skip_hydrogen_calculation: - self.fix_stereo() - self.flush_cache() - - def remap(self, mapping: Dict[int, int], *, copy: bool = False) -> 'MoleculeContainer': - atoms = self._atoms # keep original atoms dict - h = super().remap(mapping, copy=copy) - - mg = mapping.get - sp = self._plane - shg = self._hydrogens - - if copy: - h._MoleculeContainer__name = self.__name - if self.__meta is not None: - h._MoleculeContainer__meta = self.__meta.copy() - hb = h._bonds - hp = h._plane - hhg = h._hydrogens - hcf = h._conformers - hm = h._parsed_mapping - - # deep copy of bonds - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond = bond.copy() - bond._attach_graph(h, n, m) - else: - hb = {} - hp = {} - hhg = {} - hcf = [] - hm = {} - - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond - bond._change_map(n, m) - - for n in atoms: - m = mg(n, n) - hp[m] = sp[n] - hhg[m] = shg[n] - - hcf.extend({mg(n, n): x for n, x in c.items()} for c in self._conformers) - for n, m in self._parsed_mapping.items(): - hm[mg(n, n)] = m - - if copy: - return h - - self._bonds = hb - self._plane = hp - self._hydrogens = hhg - self._conformers = hcf - self._parsed_mapping = hm - return self + if self._bonds[m].pop(n).order != 8: + if self._changed is None: + self._changed = {n, m} + else: + self._changed.add(n) + self._changed.add(m) + if not _skip_calculation: + self.fix_labels() def copy(self) -> 'MoleculeContainer': copy = super().copy() @@ -376,32 +284,12 @@ def copy(self) -> 'MoleculeContainer': copy._meta = None else: copy._meta = self._meta.copy() - copy._parsed_mapping = self._parsed_mapping.copy() - copy._conformers = [c.copy() for c in self._conformers] - copy._cis_trans_stereo = self._cis_trans_stereo.copy() return copy def union(self, other: 'MoleculeContainer', *, remap: bool = False, copy: bool = True) -> 'MoleculeContainer': if not isinstance(other, MoleculeContainer): raise TypeError('MoleculeContainer expected') - u, o = super().union(other, remap=remap, copy=copy) - - ub = u._bonds - for n, m_bond in o._bonds.items(): - ub[n] = ubn = {} - for m, bond in m_bond.items(): - if m in ub: # bond partially exists. need back-connection. - ubn[m] = ub[m][n] - else: - ubn[m] = bond = bond.copy() - bond._attach_graph(u, n, m) - - u._MoleculeContainer__name = u._MoleculeContainer__meta = None - u._conformers.clear() - u._plane.update(o._plane) - u._hydrogens.update(o._hydrogens) - u._parsed_mapping.update(o._parsed_mapping) - return u + return super().union(other, remap=remap, copy=copy) def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalculate_hydrogens=True, skip_neighbors_marks=False, skip_hybridizations_marks=False, skip_hydrogens_marks=False, @@ -1078,27 +966,5 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.flush_cache() del self._backup - def __getstate__(self): - return {'conformers': self._conformers, 'hydrogens': self._hydrogens, 'atoms_stereo': self._atoms_stereo, - 'allenes_stereo': self._allenes_stereo, 'cis_trans_stereo': self._cis_trans_stereo, - 'parsed_mapping': self._parsed_mapping, 'meta': self.__meta, 'name': self.__name, - 'plane': self._plane, **super().__getstate__()} - - def __setstate__(self, state): - super().__setstate__(state) - self._conformers = state['conformers'] - self._atoms_stereo = state['atoms_stereo'] - self._allenes_stereo = state['allenes_stereo'] - self._cis_trans_stereo = state['cis_trans_stereo'] - self._hydrogens = state['hydrogens'] - self._parsed_mapping = state['parsed_mapping'] - self._plane = state['plane'] - self.__meta = state['meta'] - self.__name = state['name'] - - # attach bonds to graph - for n, m, b in self.bonds(): - b._attach_graph(self, n, m) - __all__ = ['MoleculeContainer'] diff --git a/chython/containers/query.py b/chython/containers/query.py index 5024e915..7a218786 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,14 +16,13 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from itertools import chain, product -from typing import Dict, List, Tuple, Union +from typing import Tuple, Union from .bonds import Bond, QueryBond from .graph import Graph from ..algorithms.isomorphism import QueryIsomorphism from ..algorithms.smiles import QuerySmiles from ..algorithms.stereo import Stereo -from ..periodictable import Element, ListElement, QueryElement +from ..periodictable import Element, QueryElement from ..periodictable.base import Query @@ -50,158 +49,10 @@ def add_bond(self, n, m, bond: Union[QueryBond, Bond, int, Tuple[int, ...]]): bond = QueryBond(bond) super().add_bond(n, m, bond) - def copy(self) -> 'QueryContainer': - copy = super().copy() - copy._cis_trans_stereo = self._cis_trans_stereo.copy() - return copy - def union(self, other: 'QueryContainer', *, remap: bool = False, copy: bool = True) -> 'QueryContainer': if not isinstance(other, QueryContainer): raise TypeError('QueryContainer expected') - u, o = super().union(other, remap=remap, copy=copy) - - ub = u._bonds - for n, m_bond in o._bonds.items(): - ub[n] = ubn = {} - for m, bond in m_bond.items(): - if m in ub: # bond partially exists. need back-connection. - ubn[m] = ub[m][n] - else: - ubn[m] = bond.copy() - - u._neighbors.update(o._neighbors) - u._hybridizations.update(o._hybridizations) - u._hydrogens.update(o._hydrogens) - u._rings_sizes.update(o._rings_sizes) - u._heteroatoms.update(o._heteroatoms) - u._masked.update(o._masked) - return u - - def remap(self, mapping: Dict[int, int], *, copy=False) -> 'QueryContainer': - atoms = self._atoms # keep original atoms dict - h = super().remap(mapping, copy=copy) - - mg = mapping.get - hydrogens = self._hydrogens - neighbors = self._neighbors - hybridizations = self._hybridizations - heteroatoms = self._heteroatoms - rings_sizes = self._rings_sizes - masked = self._masked - - if copy: - hb = h._bonds - hhg = h._hydrogens - hn = h._neighbors - hh = h._hybridizations - hx = h._heteroatoms - hrs = h._rings_sizes - hm = h._masked - - # deep copy of bonds - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond.copy() - else: - hb = {} - hhg = {} - hn = {} - hh = {} - hx = {} - hrs = {} - hm = {} - - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond - - for n in atoms: - m = mg(n, n) - hhg[m] = hydrogens[n] - hn[m] = neighbors[n] - hh[m] = hybridizations[n] - hx[m] = heteroatoms[n] - hrs[m] = rings_sizes[n] - hm[m] = masked[n] - - if copy: - return h # noqa - - self._bonds = hb - self._hydrogens = hhg - self._neighbors = hn - self._hybridizations = hh - self._heteroatoms = hx - self._rings_sizes = hrs - self._masked = hm - return self - - def enumerate_queries(self, *, enumerate_marks: bool = False): - """ - Enumerate complex queries into multiple simple ones. For example `[N,O]-C` into `NC` and `OC`. - - :param enumerate_marks: enumerate multiple marks to separate queries - """ - atoms = [(n, a._numbers) for n, a in self._atoms.items() if isinstance(a, ListElement)] - bonds = [(n, m, b.order) for n, m, b in self.bonds() if len(b.order) > 1] - for combo in product(*(x for *_, x in chain(atoms, bonds))): - copy = self.copy() - for (n, _), a in zip(atoms, combo): - copy._atoms[n] = a = QueryElement.from_atomic_number(a)() - a._attach_graph(copy, n) - for (n, m, _), b in zip(bonds, combo[len(atoms):]): - copy._bonds[n][m]._QueryBond__order = (b,) # noqa - - if enumerate_marks: - c = 0 - slices = [] - data = [] - for attr in ('_neighbors', '_hybridizations', '_hydrogens', '_heteroatoms', '_rings_sizes'): - tmp = [(n, v) for n, v in getattr(self, attr).items() if len(v) > 1] - if tmp: - data.extend(tmp) - slices.append((attr, c, c + len(tmp))) - c += len(tmp) - - for combo2 in product(*(x for _, x in data)): - copy2 = copy.copy() - for attr, i, j in slices: - attr = getattr(copy2, attr) - for (n, _), v in zip(data[i: j], combo2[i: j]): - attr[n] = (v,) - yield copy2 - else: - yield copy - - def __getstate__(self): - return {'atoms_stereo': self._atoms_stereo, 'allenes_stereo': self._allenes_stereo, - 'cis_trans_stereo': self._cis_trans_stereo, 'neighbors': self._neighbors, - 'hybridizations': self._hybridizations, 'hydrogens': self._hydrogens, 'masked': self._masked, - 'rings_sizes': self._rings_sizes, 'heteroatoms': self._heteroatoms, **super().__getstate__()} - - def __setstate__(self, state): - super().__setstate__(state) - self._atoms_stereo = state['atoms_stereo'] - self._allenes_stereo = state['allenes_stereo'] - self._cis_trans_stereo = state['cis_trans_stereo'] - self._neighbors = state['neighbors'] - self._hybridizations = state['hybridizations'] - self._hydrogens = state['hydrogens'] - self._rings_sizes = state['rings_sizes'] - self._heteroatoms = state['heteroatoms'] - self._masked = state['masked'] + return super().union(other, remap=remap, copy=copy) __all__ = ['QueryContainer'] diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index d1c1edd0..d65e039d 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -20,13 +20,13 @@ from CachedMethods import class_cached_property from collections import defaultdict from typing import Dict, List, Optional, Set, Tuple, Type -from ...exceptions import IsNotConnectedAtom, ValenceError +from ...exceptions import ValenceError class Element(ABC): __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens', '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_xyz', - '_neighbors', '_heteroatoms', '_hybridization') + '_neighbors', '_heteroatoms', '_hybridization', '_ring_sizes', '_in_ring') __class_cache__ = {} def __init__(self, isotope: Optional[int] = None): @@ -50,10 +50,12 @@ def __init__(self, isotope: Optional[int] = None): self._heteroatoms = 0 self._hybridization = 1 self._stereo = None + self._ring_sizes = () + self._in_ring = False def __repr__(self): - if self._isotope: - return f'{self.__class__.__name__}({self._isotope})' + if self.isotope: + return f'{self.__class__.__name__}({self.isotope})' return f'{self.__class__.__name__}()' @property @@ -201,7 +203,7 @@ def total_hydrogens(self) -> int: return self.implicit_hydrogens + self.explicit_hydrogens @property - def stereo(self): + def stereo(self) -> Optional[bool]: """ Tetrahedron or allene stereo label """ @@ -227,6 +229,20 @@ def hybridization(self): """ return self._hybridization + @property + def ring_sizes(self) -> Tuple[int, ...]: + """ + Atom rings sizes. + """ + return self._ring_sizes + + @property + def in_ring(self) -> bool: + """ + Atom in any ring. + """ + return self._in_ring + def copy(self, full=False): copy = object.__new__(self.__class__) copy._isotope = self.isotope @@ -241,33 +257,13 @@ def copy(self, full=False): copy._neighbors = self.neighbors copy._heteroatoms = self.heteroatoms copy._hybridization = self.hybridization + copy._ring_sizes = self.ring_sizes + copy._in_ring = self.in_ring return copy def __copy__(self): return self.copy() - @property - def ring_sizes(self) -> Tuple[int, ...]: - """ - Atom rings sizes. - """ - try: - return self._graph().atoms_rings_sizes[self._n] - except AttributeError: - raise IsNotConnectedAtom - except KeyError: - return () - - @property - def in_ring(self) -> bool: - """ - Atom in any ring. - """ - try: - return self._n in self._graph().ring_atoms - except AttributeError: - raise IsNotConnectedAtom - @classmethod def from_symbol(cls, symbol: str) -> Type['Element']: """ diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 2cc55367..4145acf5 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -354,7 +354,7 @@ def __hash__(self): self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) def __repr__(self): - return f'{self.__class__.__name__}([{",".join(self._elements)}])' + return f'{self.__class__.__name__}([{self.atomic_symbol}])' class QueryElement(ExtendedQuery, ABC): @@ -367,8 +367,8 @@ def __init__(self, isotope: Optional[int] = None): self._isotope = isotope def __repr__(self): - if self._isotope: - return f'{self.__class__.__name__}({self._isotope})' + if self.isotope: + return f'{self.__class__.__name__}({self.isotope})' return f'{self.__class__.__name__}()' @property diff --git a/pyproject.toml b/pyproject.toml index 02c177e2..bf8fd347 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = 'chython' -version = '1.81' +version = '2.0' description = 'Library for processing molecules and reactions in python way' authors = ['Ramil Nugmanov '] license = 'LGPLv3' From dcf8c8a5708f7fb8247f3a414ad352ea139125de Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 13:29:38 +0100 Subject: [PATCH 04/68] Refactor molecule structure handling and backup procedures. Simplify the molecule structure by removing redundant charge and radical attributes and streamline the backup procedure by utilizing the copy method. Improved bond copying with added stereo support, and refined element creation from atomic data. --- chython/containers/bonds.py | 56 ++++++++++---------- chython/containers/cgr.py | 55 ++------------------ chython/containers/graph.py | 2 +- chython/containers/molecule.py | 74 +++++---------------------- chython/periodictable/base/dynamic.py | 39 +++++++++++--- chython/periodictable/base/element.py | 38 +++++++++----- chython/periodictable/base/query.py | 29 +++++++---- 7 files changed, 120 insertions(+), 173 deletions(-) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index e6014c1e..88cedd85 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -71,17 +71,14 @@ def copy(self, full=False) -> 'Bond': if full: copy._stereo = self.stereo copy._in_ring = self.in_ring + else: + copy._in_ring = False + copy._stereo = None return copy def __copy__(self): return self.copy() - @classmethod - def from_bond(cls, bond): - if isinstance(bond, Bond): - return cls(bond.order) - raise TypeError('Bond expected') - class DynamicBond: __slots__ = ('_order', '_p_order') @@ -146,17 +143,12 @@ def __copy__(self): return self.copy() @classmethod - def from_bond(cls, bond): - if isinstance(bond, Bond): - copy = object.__new__(cls) - copy._order = copy._p_order = bond.order - return copy - elif isinstance(bond, cls): - copy = object.__new__(cls) - copy._order = bond.order - copy._p_order = bond.p_order - return copy - raise TypeError('DynamicBond expected') + def from_bond(cls, bond: 'Bond') -> 'DynamicBond': + if not isinstance(bond, Bond): + raise TypeError('Bond expected') + copy = object.__new__(cls) + copy._order = copy._p_order = bond.order + return copy class QueryBond: @@ -222,33 +214,37 @@ def in_ring(self) -> Optional[bool]: return self._in_ring @property - def stereo(self): + def stereo(self) -> Optional[bool]: return self._stereo def copy(self, full=False) -> 'QueryBond': copy = object.__new__(self.__class__) copy._order = self.order - copy._in_ring = self.in_ring if full: + copy._in_ring = self.in_ring copy._stereo = self.stereo + else: + copy._in_ring = copy._stereo = None return copy def __copy__(self): return self.copy() @classmethod - def from_bond(cls, bond): - if isinstance(bond, Bond): - copy = object.__new__(cls) - copy._order = (bond.order,) - copy._in_ring = None - return copy - elif isinstance(bond, cls): - copy = object.__new__(cls) - copy._order = bond.order + def from_bond(cls, bond: 'Bond', stereo=False, in_ring=False) -> 'QueryBond': + if not isinstance(bond, Bond): + raise TypeError('Bond expected') + copy = object.__new__(cls) + copy._order = (bond.order,) + if in_ring: copy._in_ring = bond.in_ring - return copy - raise TypeError('QueryBond or Bond expected') + else: + copy._in_ring = None + if stereo: + copy._stereo = bond.stereo + else: + copy._stereo = None + return copy __all__ = ['Bond', 'DynamicBond', 'QueryBond'] diff --git a/chython/containers/cgr.py b/chython/containers/cgr.py index 24959c80..9bdc697d 100644 --- a/chython/containers/cgr.py +++ b/chython/containers/cgr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -28,21 +28,13 @@ class CGRContainer(CGRSmiles, Morgan, Rings, Isomorphism, FingerprintsCGR): - __slots__ = ('_atoms', '_bonds', '_charges', '_radicals', '_p_charges', '_p_radicals', '__dict__', '__weakref__') + __slots__ = ('_atoms', '_bonds', '__dict__') _atoms: Dict[int, DynamicElement] _bonds: Dict[int, Dict[int, DynamicBond]] - _charges: Dict[int, int] - _radicals: Dict[int, bool] - _p_charges: Dict[int, int] - _p_radicals: Dict[int, bool] def __init__(self): self._atoms = {} self._bonds = {} - self._charges = {} - self._radicals = {} - self._p_charges = {} - self._p_radicals = {} def bonds(self) -> Iterator[Tuple[int, int, DynamicBond]]: """ @@ -59,19 +51,8 @@ def bonds(self) -> Iterator[Tuple[int, int, DynamicBond]]: def center_atoms(self) -> Tuple[int, ...]: """ Get list of atoms of reaction center (atoms with dynamic: bonds, charges, radicals). """ - radicals = self._radicals - p_charges = self._p_charges - p_radicals = self._p_radicals - - center = set() - for n, c in self._charges.items(): - if c != p_charges[n] or radicals[n] != p_radicals[n]: - center.add(n) - - for n, m_bond in self._bonds.items(): - if any(bond.order != bond.p_order for bond in m_bond.values()): - center.add(n) - + center = {n for n, a in self._atoms.items() if a.is_dynamic} + center.update(n for n, m_bond in self._bonds.items() if any(bond.is_dynamic for bond in m_bond.values())) return tuple(center) def substructure(self, atoms) -> 'CGRContainer': @@ -82,22 +63,10 @@ def substructure(self, atoms) -> 'CGRContainer': """ atoms = set(atoms) sa = self._atoms - sc = self._charges - sr = self._radicals sb = self._bonds - spc = self._p_charges - spr = self._p_radicals sub = object.__new__(self.__class__) - sub._charges = {n: sc[n] for n in atoms} - sub._radicals = {n: sr[n] for n in atoms} - sub._p_charges = {n: spc[n] for n in atoms} - sub._p_radicals = {n: spr[n] for n in atoms} - - sub._atoms = ca = {} - for n in atoms: - ca[n] = atom = sa[n].copy() - atom._attach_graph(sub, n) + sub._atoms = {n: sa[n].copy() for n in atoms} sub._bonds = cb = {} for n in atoms: @@ -136,19 +105,5 @@ def get_mapping(self, other: 'CGRContainer', /, *, automorphism_filter: bool = T def __iter__(self): return iter(self._atoms) - def __getstate__(self): - return {'atoms': self._atoms, 'bonds': self._bonds, 'charges': self._charges, 'radicals': self._radicals, - 'p_charges': self._p_charges, 'p_radicals': self._p_radicals} - - def __setstate__(self, state): - self._atoms = state['atoms'] - for n, a in state['atoms'].items(): - a._attach_graph(self, n) - self._charges = state['charges'] - self._radicals = state['radicals'] - self._bonds = state['bonds'] - self._p_charges = state['p_charges'] - self._p_radicals = state['p_radicals'] - __all__ = ['CGRContainer'] diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 54470b35..fe3dc720 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -132,7 +132,7 @@ def copy(self): if m in cb: # bond partially exists. need back-connection. cbn[m] = cb[m][n] else: - cbn[m] = bond.copy() + cbn[m] = bond.copy(full=True) return copy def remap(self, mapping: Dict[int, int]): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index a4b5c8ef..5ccf06fc 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -443,16 +443,12 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': if not isinstance(other, MoleculeContainer): raise TypeError('MoleculeContainer expected') sa = self._atoms - sc = self._charges - sr = self._radicals sb = self._bonds bonds = [] adj = defaultdict(lambda: defaultdict(lambda: [None, None])) oa = other._atoms - oc = other._charges - or_ = other._radicals ob = other._bonds common = sa.keys() & oa.keys() @@ -460,38 +456,27 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': h = CGRContainer() ha = h._atoms hb = h._bonds - hc = h._charges - hpc = h._p_charges - hr = h._radicals - hpr = h._p_radicals for n in sa.keys() - common: # cleavage atoms - hc[n] = hpc[n] = sc[n] - hr[n] = hpr[n] = sr[n] + ha[n] = DynamicElement.from_atom(sa[n]) hb[n] = {} - ha[n] = a = DynamicElement.from_atom(sa[n]) - a._attach_graph(h, n) - for m, bond in sb[n].items(): if m not in ha: if m in common: # bond to common atoms is broken bond bond = DynamicBond(bond.order, None) else: - bond = DynamicBond(bond.order, bond.order) + bond = DynamicBond.from_bond(bond) bonds.append((n, m, bond)) for n in oa.keys() - common: # coupling atoms - hc[n] = hpc[n] = oc[n] - hr[n] = hpr[n] = or_[n] + ha[n] = DynamicElement.from_atom(oa[n]) hb[n] = {} - ha[n] = a = DynamicElement.from_atom(oa[n]) - a._attach_graph(h, n) for m, bond in ob[n].items(): if m not in ha: if m in common: # bond to common atoms is formed bond bond = DynamicBond(None, bond.order) else: - bond = DynamicBond(bond.order, bond.order) + bond = DynamicBond.from_bond(bond) bonds.append((n, m, bond)) for n in common: an = adj[n] @@ -502,17 +487,8 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': if m in common: an[m][1] = bond.order for n in common: - san = sa[n] - if san.atomic_number != oa[n].atomic_number or san.isotope != oa[n].isotope: - raise MappingError(f'atoms with number {n} not equal') - - hc[n] = sc[n] - hpc[n] = oc[n] - hr[n] = sr[n] - hpr[n] = or_[n] + ha[n] = DynamicElement.from_atoms(sa[n], oa[n]) hb[n] = {} - ha[n] = a = DynamicElement.from_atom(san) - a._attach_graph(h, n) for m, (o1, o2) in adj[n].items(): if m not in ha: @@ -926,44 +902,20 @@ def __enter__(self): """ Transaction of changes. Keep current state for restoring on errors. """ - atoms = {} - for n, atom in self._atoms.items(): - atom = atom.copy() - atoms[n] = atom - atom._attach_graph(self, n) - - bonds = {} - for n, m_bond in self._bonds.items(): - bonds[n] = cbn = {} - for m, bond in m_bond.items(): - if m in bonds: # bond partially exists. need back-connection. - cbn[m] = bonds[m][n] - else: - cbn[m] = bond = bond.copy() - bond._attach_graph(self, n, m) - - self._backup = {'atoms': atoms, 'bonds': bonds, 'parsed_mapping': self._parsed_mapping.copy(), - 'plane': self._plane.copy(), 'charges': self._charges.copy(), 'radicals': self._radicals.copy(), - 'hydrogens': self._hydrogens.copy(), 'conformers': [x.copy() for x in self._conformers], - 'atoms_stereo': self._atoms_stereo.copy(), 'allenes_stereo': self._allenes_stereo.copy(), - 'cis_trans_stereo': self._cis_trans_stereo.copy()} + self._backup = self.copy() return self def __exit__(self, exc_type, exc_val, exc_tb): if exc_type: # restore state backup = self._backup - self._atoms = backup['atoms'] - self._bonds = backup['bonds'] - self._parsed_mapping = backup['parsed_mapping'] - self._plane = backup['plane'] - self._charges = backup['charges'] - self._radicals = backup['radicals'] - self._hydrogens = backup['hydrogens'] - self._conformers = backup['conformers'] - self._atoms_stereo = backup['atoms_stereo'] - self._allenes_stereo = backup['allenes_stereo'] - self._cis_trans_stereo = backup['cis_trans_stereo'] + self._atoms = backup._atoms + self._bonds = backup._bonds + self._meta = backup._meta + self._name = backup._name self.flush_cache() + else: # update internal state + self.fix_labels() + self.fix_stereo() del self._backup diff --git a/chython/periodictable/base/dynamic.py b/chython/periodictable/base/dynamic.py index d0989547..c7af1a7a 100644 --- a/chython/periodictable/base/dynamic.py +++ b/chython/periodictable/base/dynamic.py @@ -17,7 +17,7 @@ # along with this program; if not, see . # from abc import ABC, abstractmethod -from typing import Type, Union, Optional +from typing import Type, Optional from .element import Element @@ -26,6 +26,8 @@ class DynamicElement(ABC): def __init__(self, isotope: Optional[int]): self._isotope = isotope + self._charge = self._p_charge = 0 + self._is_radical = self._p_is_radical = False @property def isotope(self): @@ -65,15 +67,36 @@ def from_atomic_number(cls, number: int) -> Type['DynamicElement']: return element @classmethod - def from_atom(cls, atom: Union['Element', 'DynamicElement']) -> 'DynamicElement': + def from_atom(cls, atom: 'Element') -> 'DynamicElement': """ - get DynamicElement object from Element object or copy of DynamicElement object + get DynamicElement object from Element object """ - if isinstance(atom, Element): - return cls.from_atomic_number(atom.atomic_number)(atom.isotope) - elif not isinstance(atom, DynamicElement): - raise TypeError('Element or DynamicElement expected') - return atom.copy() + if not isinstance(atom, Element): + raise TypeError('Element expected') + dynamic = object.__new__(cls.from_atomic_number(atom.atomic_number)) + dynamic._isotope = atom.isotope + dynamic._charge = dynamic._p_charge = atom.charge + dynamic._is_radical = dynamic._p_is_radical = atom.is_radical + return dynamic + + @classmethod + def from_atoms(cls, atom1: 'Element', atom2: 'Element') -> 'DynamicElement': + """ + get DynamicElement object from pair of Element objects + """ + if not isinstance(atom1, Element) or not isinstance(atom2, Element): + raise TypeError('Element expected') + if atom1.atomic_number != atom2.atomic_number: + raise ValueError('elements should be of the same type') + if atom1.isotope != atom2.isotope: + raise ValueError('elements should be of the same isotope') + dynamic = object.__new__(cls.from_atomic_number(atom1.atomic_number)) + dynamic._isotope = atom1.isotope + dynamic._charge = atom1.charge + dynamic._p_charge = atom2.charge + dynamic._is_radical = atom1.is_radical + dynamic._p_is_radical = atom2.is_radical + return dynamic @property def charge(self) -> int: diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index d65e039d..943d1128 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -45,11 +45,12 @@ def __init__(self, isotope: Optional[int] = None): self._is_radical = False self._x = self._y = 0 self._implicit_hydrogens = None + self._stereo = None + self._explicit_hydrogens = 0 self._neighbors = 0 self._heteroatoms = 0 self._hybridization = 1 - self._stereo = None self._ring_sizes = () self._in_ring = False @@ -243,22 +244,40 @@ def in_ring(self) -> bool: """ return self._in_ring - def copy(self, full=False): + def copy(self, full=False, hydrogens=False, stereo=False) -> 'Element': + """ + Get a copy of the Element object with attribute copy control. + """ copy = object.__new__(self.__class__) copy._isotope = self.isotope copy._charge = self.charge copy._is_radical = self.is_radical + copy._x = self.x + copy._y = self.y if full: - copy._x = self.x - copy._y = self.y copy._implicit_hydrogens = self.implicit_hydrogens - copy._explicit_hydrogens = self.explicit_hydrogens copy._stereo = self.stereo + copy._explicit_hydrogens = self.explicit_hydrogens copy._neighbors = self.neighbors copy._heteroatoms = self.heteroatoms copy._hybridization = self.hybridization copy._ring_sizes = self.ring_sizes copy._in_ring = self.in_ring + else: + copy._explicit_hydrogens = 0 + copy._neighbors = 0 + copy._heteroatoms = 0 + copy._hybridization = 1 + copy._ring_sizes = () + copy._in_ring = False + if hydrogens: + copy._implicit_hydrogens = self.implicit_hydrogens + else: + copy._implicit_hydrogens = None + if stereo: + copy._stereo = self.stereo + else: + copy._stereo = None return copy def __copy__(self): @@ -290,15 +309,6 @@ def from_atomic_number(cls, number: int) -> Type['Element']: except KeyError: raise ValueError(f'Element with number "{number}" not found') - @classmethod - def from_atom(cls, atom: 'Element') -> 'Element': - """ - get Element copy - """ - if not isinstance(atom, Element): - raise TypeError('Element expected') - return atom.copy() - def __eq__(self, other): """ compare attached to molecules elements diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 4145acf5..19b5e66b 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -413,19 +413,30 @@ def from_atomic_number(cls, number: int) -> Type['QueryElement']: return element @classmethod - def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': + def from_atom(cls, atom: 'Element', neighbors=False, hybridization=False, heteroatoms=False, + hydrogens=False, ring_sizes=False) -> 'QueryElement': """ get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement """ - if isinstance(atom, Element): - # transfer true atomic props - query = cls.from_atomic_number(atom.atomic_number)(atom.isotope) - query._charge = atom.charge - query._is_radical = atom.is_radical - return query - elif not isinstance(atom, Query): + if not isinstance(atom, Element): raise TypeError('Element or Query expected') - return atom.copy() + + # transfer true atomic props + query = cls.from_atomic_number(atom.atomic_number)(atom.isotope) + query._charge = atom.charge + query._is_radical = atom.is_radical + + if neighbors: + query._neighbors == (atom.neighbors,) + if hybridization: + query._hybridization == (atom.hybridization,) + if heteroatoms: + query._heteroatoms = (atom.heteroatoms,) + if ring_sizes: + query._ring_sizes = atom.ring_sizes + if hydrogens and atom.implicit_hydrogens is not None: + query._implicit_hydrogens = (atom.implicit_hydrogens,) + return query def copy(self, full=False): copy = super().copy(full=full) From e0fb2c5b91f01da54f0b76e8f523c9675e1ff648 Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 14:54:16 +0100 Subject: [PATCH 05/68] Enhance molecule container: retain stereo info and fix labels. Updated MoleculeContainer to retain stereo information during atom/bond operations by introducing conditions in the fix_labels method. Expanded substructure method allowing customizable mark settings and improved copy methods in Bond and QueryElement to optionally retain stereo data. --- chython/containers/bonds.py | 7 +- chython/containers/molecule.py | 178 +++++++++++----------------- chython/periodictable/base/query.py | 11 +- 3 files changed, 79 insertions(+), 117 deletions(-) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index 88cedd85..79f13cad 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -65,7 +65,7 @@ def stereo(self) -> Optional[bool]: def in_ring(self) -> bool: return self._in_ring - def copy(self, full=False) -> 'Bond': + def copy(self, full=False, stereo=False) -> 'Bond': copy = object.__new__(self.__class__) copy._order = self.order if full: @@ -73,7 +73,10 @@ def copy(self, full=False) -> 'Bond': copy._in_ring = self.in_ring else: copy._in_ring = False - copy._stereo = None + if stereo: + copy._stereo = self.stereo + else: + copy._stereo = None return copy def __copy__(self): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 5ccf06fc..40205489 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -37,20 +37,21 @@ from ..algorithms.stereo import MoleculeStereo from ..algorithms.tautomers import Tautomers from ..algorithms.x3dom import X3domMolecule -from ..exceptions import MappingError, ValenceError +from ..exceptions import ValenceError from ..periodictable import DynamicElement, Element, QueryElement, H class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, X3domMolecule): - __slots__ = ('_backup', '_meta', '_name', '_changed') + __slots__ = ('_meta', '_name', '_changed', '_backup') def __init__(self): super().__init__() self._meta = None self._name = None self._changed = None + self._backup = None @property def meta(self) -> Dict: @@ -213,7 +214,7 @@ def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=Fals self._changed = {n} else: self._changed.add(n) - if not _skip_calculation: + if not _skip_calculation and self._backup is None: self.fix_labels() return n @@ -236,8 +237,9 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): else: self._changed.add(n) self._changed.add(m) - if not _skip_calculation: + if not _skip_calculation and self._backup is None: self.fix_labels() + self.fix_stereo() def delete_atom(self, n: int, *, _skip_calculation=False): """ @@ -256,8 +258,9 @@ def delete_atom(self, n: int, *, _skip_calculation=False): self._changed = {m} else: self._changed.add(m) - if not _skip_calculation: + if not _skip_calculation and self._backup is None: self.fix_labels() + self.fix_stereo() def delete_bond(self, n: int, m: int, *, _skip_calculation=False): """ @@ -274,8 +277,9 @@ def delete_bond(self, n: int, m: int, *, _skip_calculation=False): else: self._changed.add(n) self._changed.add(m) - if not _skip_calculation: + if not _skip_calculation and self._backup is None: self.fix_labels() + self.fix_stereo() def copy(self) -> 'MoleculeContainer': copy = super().copy() @@ -293,7 +297,8 @@ def union(self, other: 'MoleculeContainer', *, remap: bool = False, copy: bool = def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalculate_hydrogens=True, skip_neighbors_marks=False, skip_hybridizations_marks=False, skip_hydrogens_marks=False, - skip_rings_sizes_marks=False, skip_heteroatoms_marks=False) -> \ + skip_rings_sizes_marks=False, skip_heteroatoms_marks=False, skip_in_ring_bond_marks=False, + skip_stereo_marks=False) -> \ Union['MoleculeContainer', 'QueryContainer']: """ Create substructure containing atoms from atoms list. @@ -310,6 +315,8 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul :param skip_hydrogens_marks: Don't set hydrogens count marks on substructured queries :param skip_rings_sizes_marks: Don't set rings_sizes marks on substructured queries :param skip_heteroatoms_marks: Don't set heteroatoms count marks + :param skip_in_ring_bond_marks: Don't set in_ring bond marks + :param skip_stereo_marks: Don't set stereo marks on substructured queries """ if not atoms: raise ValueError('empty atoms list not allowed') @@ -317,97 +324,51 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul raise ValueError('invalid atom numbers') atoms = tuple(n for n in self._atoms if n in atoms) # save original order if as_query: - atom_type = QueryElement - bond_type = QueryBond sub = object.__new__(QueryContainer) - else: - atom_type = Element - bond_type = Bond - sub = object.__new__(self.__class__) - sub._MoleculeContainer__name = sub._MoleculeContainer__meta = None - - sa = self._atoms - sb = self._bonds - sc = self._charges - sr = self._radicals - - sub._charges = {n: sc[n] for n in atoms} - sub._radicals = {n: sr[n] for n in atoms} - sub._atoms = ca = {} + lost = {n for n, a in self._atoms.items() if a.atomic_number != 1} - set(atoms) # atoms not in substructure + # atoms with fully present neighbors + not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} + + # check for full presence of cumulene chains and terminal attachments + for p in self._stereo_cumulenes.values(): + if not not_skin.issuperset(p): + not_skin.difference_update(p) + + sub._atoms = {n: QueryElement.from_atom(self._atoms[n], + neighbors=not skip_neighbors_marks, + hybridization=not skip_hybridizations_marks, + hydrogens=not skip_hydrogens_marks, + ring_sizes=not skip_rings_sizes_marks, + heteroatoms=not skip_heteroatoms_marks, + stereo=not skip_stereo_marks and n in not_skin) + for n in atoms} + sub._bonds = sb = {} + for n in atoms: + sb[n] = sbn = {} + for m, bond in self._bonds[n].items(): + if m in sb: # bond partially exists. need back-connection. + sbn[m] = sb[m][n] + elif m in atoms: + sbn[m] = QueryBond.from_bond(bond, + in_ring=not skip_in_ring_bond_marks, + stereo=not skip_stereo_marks and n in not_skin and m in not_skin) + return sub + + # molecule substructure + sub = object.__new__(self.__class__) + sub._name = sub._meta = sub._changed = None + sub._atoms = {n: self._atoms[n].copy(hydrogens=not recalculate_hydrogens, stereo=True) for n in atoms} + sub._bonds = sb = {} for n in atoms: - ca[n] = atom = atom_type.from_atom(sa[n]) - atom._attach_graph(sub, n) - - sub._bonds = cb = {} - for n in atoms: - cb[n] = cbn = {} - for m, bond in sb[n].items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] + sb[n] = sbn = {} + for m, bond in self._bonds[n].items(): + if m in sb: # bond partially exists. need back-connection. + sbn[m] = sb[m][n] elif m in atoms: - cbn[m] = bond = bond_type.from_bond(bond) - if not as_query: - bond._attach_graph(sub, n, m) - - if as_query: - lost = {n for n, a in sa.items() if a.atomic_number != 1} - set(atoms) # atoms not in substructure - not_skin = {n for n in atoms if lost.isdisjoint(sb[n])} - sub._atoms_stereo = {n: s for n, s in self._atoms_stereo.items() if n in not_skin} - sub._allenes_stereo = {n: s for n, s in self._allenes_stereo.items() - if not_skin.issuperset(self._stereo_allenes_paths[n]) and - not_skin.issuperset(x for x in self._stereo_allenes[n] if x)} - sub._cis_trans_stereo = {nm: s for nm, s in self._cis_trans_stereo.items() - if not_skin.issuperset(self._stereo_cis_trans_paths[nm]) and - not_skin.issuperset(x for x in self._stereo_cis_trans[nm] if x)} - - sub._masked = {n: False for n in atoms} - if skip_heteroatoms_marks: - sub._heteroatoms = {n: () for n in atoms} - else: - sha = self.heteroatoms - sub._heteroatoms = {n: (sha(n),) for n in atoms} - - if skip_hybridizations_marks: - sub._hybridizations = {n: () for n in atoms} - else: - sh = self.hybridization - sub._hybridizations = {n: (sh(n),) for n in atoms} - if skip_neighbors_marks: - sub._neighbors = {n: () for n in atoms} - else: - sn = self.neighbors - sub._neighbors = {n: (sn(n),) for n in atoms} - if skip_hydrogens_marks: - sub._hydrogens = {n: () for n in atoms} - else: - shg = self._hydrogens - sub._hydrogens = {n: () if shg[n] is None else (shg[n],) for n in atoms} - if skip_rings_sizes_marks: - sub._rings_sizes = {n: () for n in atoms} - else: - rs = self.atoms_rings_sizes - sub._rings_sizes = {n: rs.get(n, ()) for n in atoms} - else: - sub._conformers = [{n: c[n] for n in atoms} for c in self._conformers] - - if recalculate_hydrogens: - sub._hydrogens = {} - for n in atoms: - sub._calc_implicit(n) - else: - hg = self._hydrogens - sub._hydrogens = {n: hg[n] for n in atoms} - - sp = self._plane - sub._plane = {n: sp[n] for n in atoms} - sub._parsed_mapping = {n: m for n, m in self._parsed_mapping.items() if n in atoms} - - # fix_stereo will repair data - sub._atoms_stereo = self._atoms_stereo.copy() - sub._allenes_stereo = self._allenes_stereo.copy() - sub._cis_trans_stereo = self._cis_trans_stereo.copy() - sub.fix_stereo() + sbn[m] = bond.copy(stereo=True) + sub.fix_labels(recalculate_hydrogens=recalculate_hydrogens) + sub.fix_stereo() return sub def augmented_substructure(self, atoms: Iterable[int], deep: int = 1, **kwargs) -> 'MoleculeContainer': @@ -442,36 +403,29 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': """ if not isinstance(other, MoleculeContainer): raise TypeError('MoleculeContainer expected') - sa = self._atoms - sb = self._bonds - bonds = [] adj = defaultdict(lambda: defaultdict(lambda: [None, None])) - - oa = other._atoms - ob = other._bonds - - common = sa.keys() & oa.keys() + common = self._atoms.keys() & other._atoms.keys() h = CGRContainer() ha = h._atoms hb = h._bonds - for n in sa.keys() - common: # cleavage atoms - ha[n] = DynamicElement.from_atom(sa[n]) + for n in self._atoms.keys() - common: # cleavage atoms + ha[n] = DynamicElement.from_atom(self._atoms[n]) hb[n] = {} - for m, bond in sb[n].items(): + for m, bond in self._bonds[n].items(): if m not in ha: if m in common: # bond to common atoms is broken bond bond = DynamicBond(bond.order, None) else: bond = DynamicBond.from_bond(bond) bonds.append((n, m, bond)) - for n in oa.keys() - common: # coupling atoms - ha[n] = DynamicElement.from_atom(oa[n]) + for n in other._atoms.keys() - common: # coupling atoms + ha[n] = DynamicElement.from_atom(other._atoms[n]) hb[n] = {} - for m, bond in ob[n].items(): + for m, bond in other._bonds[n].items(): if m not in ha: if m in common: # bond to common atoms is formed bond bond = DynamicBond(None, bond.order) @@ -480,14 +434,14 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': bonds.append((n, m, bond)) for n in common: an = adj[n] - for m, bond in sb[n].items(): + for m, bond in self._bonds[n].items(): if m in common: an[m][0] = bond.order - for m, bond in ob[n].items(): + for m, bond in other._bonds[n].items(): if m in common: an[m][1] = bond.order for n in common: - ha[n] = DynamicElement.from_atoms(sa[n], oa[n]) + ha[n] = DynamicElement.from_atoms(self._atoms[n], other._atoms[n]) hb[n] = {} for m, (o1, o2) in adj[n].items(): @@ -916,7 +870,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): else: # update internal state self.fix_labels() self.fix_stereo() - del self._backup + self._backup = None # drop backup __all__ = ['MoleculeContainer'] diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 19b5e66b..fc26c962 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -107,6 +107,9 @@ def copy(self, full=False): if full: copy._masked = self.masked copy._stereo = self.stereo + else: + copy._masked = False + copy._stereo = None return copy def __copy__(self): @@ -414,7 +417,7 @@ def from_atomic_number(cls, number: int) -> Type['QueryElement']: @classmethod def from_atom(cls, atom: 'Element', neighbors=False, hybridization=False, heteroatoms=False, - hydrogens=False, ring_sizes=False) -> 'QueryElement': + hydrogens=False, ring_sizes=False, stereo=False) -> 'QueryElement': """ get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement """ @@ -427,15 +430,17 @@ def from_atom(cls, atom: 'Element', neighbors=False, hybridization=False, hetero query._is_radical = atom.is_radical if neighbors: - query._neighbors == (atom.neighbors,) + query._neighbors = (atom.neighbors,) if hybridization: - query._hybridization == (atom.hybridization,) + query._hybridization = (atom.hybridization,) if heteroatoms: query._heteroatoms = (atom.heteroatoms,) if ring_sizes: query._ring_sizes = atom.ring_sizes if hydrogens and atom.implicit_hydrogens is not None: query._implicit_hydrogens = (atom.implicit_hydrogens,) + if stereo: + query._stereo = atom.stereo return query def copy(self, full=False): From fb08fdc75d6f287bfade1b0ce35386e67a3be236 Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 16:01:19 +0100 Subject: [PATCH 06/68] Refactor stereo and chemical attributes handling Centralize chemical attributes like charge and radicals within the `atom` object. Simplify stereo data management by directly setting stereochemistry on atom and bond objects and remove unnecessary lookups. Add `ExtendedQuery` to public API and streamline related imports. --- chython/algorithms/isomorphism.py | 2 +- chython/algorithms/smiles.py | 132 +++++++++++-------------- chython/algorithms/stereo/graph.py | 14 +-- chython/periodictable/base/__init__.py | 3 +- chython/periodictable/base/query.py | 31 +++--- 5 files changed, 85 insertions(+), 97 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 76791e70..e2d95da3 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -22,7 +22,7 @@ from itertools import permutations from typing import Any, Collection, Dict, Iterator, Optional, TYPE_CHECKING, Union from .._functions import lazy_product -from ..periodictable.element import Element, Query, AnyElement, AnyMetal, ListElement +from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement if TYPE_CHECKING: diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index e4b8dfdd..412c76e0 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -26,6 +26,7 @@ from itertools import product from random import random from typing import Callable, Optional, Tuple, TYPE_CHECKING, Union +from ..periodictable import ExtendedQuery, QueryElement if TYPE_CHECKING: @@ -382,15 +383,11 @@ def _smiles_order(self: 'MoleculeContainer', stereo=True) -> Callable: def _format_cxsmiles(self: 'MoleculeContainer', order): if self.is_radical: - radical = self._radicals - return f'|^1:{",".join(str(n) for n, m in enumerate(order) if radical[m])}|' + return f'|^1:{",".join(str(n) for n, m in enumerate(order) if self._atoms[m].is_radical)}|' return def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): atom = self._atoms[n] - charge = self._charges[n] - ih = self._hydrogens[n] - hyb = self.hybridization(n) smi = ['', # [ str(atom.isotope) if atom.isotope else '', # isotope @@ -401,50 +398,51 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): f':{n}' if kwargs.get('mapping', False) else '', # mapping ''] # ] - if kwargs.get('stereo', True): - if n in self._atoms_stereo: - if ih and next(x for x in adjacency) == n: # first atom in smiles has reversed chiral mark - smi[3] = '@@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@' - else: - smi[3] = '@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@@' - elif n in self._allenes_stereo: + if atom.stereo is not None and kwargs.get('stereo', True): + # allene + if n in self._stereo_allenes_terminals: t1, t2 = self._stereo_allenes_terminals[n] env = self._stereo_allenes[n] n1 = next(x for x in adjacency[t1] if x in env) n2 = next(x for x in adjacency[t2] if x in env) smi[3] = '@' if self._translate_allene_sign(n, n1, n2) else '@@' - elif charge and kwargs.get('charges', True): - smi[5] = charge_str[charge] - elif charge and kwargs.get('charges', True): - smi[5] = charge_str[charge] + # tetrahedron + elif atom.implicit_hydrogens and next(x for x in adjacency) == n: + # first atom in smiles has reversed chiral mark + smi[3] = '@@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@' + else: + smi[3] = '@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@@' + + if atom.charge and kwargs.get('charges', True): + smi[5] = charge_str[atom.charge] - if any(smi) or atom.atomic_symbol not in organic_set or self._radicals[n] or kwargs.get('hydrogens', False): + if any(smi) or atom.atomic_symbol not in organic_set or atom.is_radical or kwargs.get('hydrogens', False): smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' - elif ih: - smi[4] = f'H{ih}' - elif hyb == 4 and ih and atom.atomic_number in (5, 7, 15): # pyrrole + elif atom.implicit_hydrogens: + smi[4] = f'H{atom.implicit_hydrogens}' + elif atom.hybridization == 4 and atom.implicit_hydrogens and atom.atomic_number in (5, 7, 15): # pyrrole smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' else: - smi[4] = f'H{ih}' - elif not ih and atom.atomic_number in (5, 6, 15, 16) and not self.not_special_connectivity[n]: + smi[4] = f'H{atom.implicit_hydrogens}' + elif not atom.implicit_hydrogens and atom.atomic_number in (5, 6, 15, 16) and not self.not_special_connectivity[n]: # elemental B, C, P, S smi[0] = '[' smi[-1] = ']' - elif ih and atom.atomic_number == 15 and hyb != 1: + elif atom.implicit_hydrogens and atom.atomic_number == 15 and atom.hybridization != 1: smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' else: - smi[4] = f'H{ih}' + smi[4] = f'H{atom.implicit_hydrogens}' - if kwargs.get('aromatic', True) and hyb == 4: + if kwargs.get('aromatic', True) and atom.hybridization == 4: smi[2] = atom.atomic_symbol.lower() else: smi[2] = atom.atomic_symbol @@ -453,14 +451,13 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): if not kwargs.get('bonds', True): return '' - bonds = self._bonds - order = bonds[n][m].order + order = self._bonds[n][m].order if order == 4: if kwargs.get('aromatic', True): return '' return ':' elif order == 1: # cis-trans /\ - if kwargs.get('aromatic', True) and self.hybridization(n) == self.hybridization(m) == 4: + if kwargs.get('aromatic', True) and self._atoms[n].hybridization == self._atoms[m].hybridization == 4: return '-' if kwargs.get('stereo', True): if 'cache' in adjacency: @@ -531,19 +528,15 @@ class CGRSmiles(Smiles): def _format_atom(self: 'CGRContainer', n, adjacency, **kwargs): atom = self._atoms[n] - charge = self._charges[n] - is_radical = self._radicals[n] - p_charge = self._p_charges[n] - p_is_radical = self._p_radicals[n] if atom.isotope: smi = [str(atom.isotope), atom.atomic_symbol] else: smi = [atom.atomic_symbol] - if charge or p_charge: - smi.append(dyn_charge_str[(charge, p_charge)]) - if is_radical or p_is_radical: - smi.append(dyn_radical_str[(is_radical, p_is_radical)]) + if atom.charge or atom.p_charge: + smi.append(dyn_charge_str[(atom.charge, atom.p_charge)]) + if atom.is_radical or atom.p_is_radical: + smi.append(dyn_radical_str[(atom.is_radical, atom.p_is_radical)]) if len(smi) != 1 or atom.atomic_symbol not in organic_set: smi.insert(0, '[') @@ -559,22 +552,19 @@ class QuerySmiles(Smiles): __slots__ = () def _format_cxsmiles(self: 'QueryContainer', order): - hybridization = self._hybridizations - heteroatoms = self._heteroatoms - masked = self._masked - radical = self._radicals - hh = ['atomProp'] cx = [] - if any(radical.values()): - cx.append(f'^1:{",".join(str(n) for n, m in enumerate(order) if radical[m])}') + rad = [str(n) for n, m in enumerate(order) if isinstance(a:=self._atoms[m], ExtendedQuery) and a.is_radical] + if rad: + cx.append('^1:' + ','.join(rad)) for n, m in enumerate(order): - if len(hb := hybridization[m]) > 1 or (hb and hb[0] != 4): - hh.append(f'{n}.hyb.{"".join(hybridization_str[x] for x in hb)}') - if ha := heteroatoms[m]: - hh.append(f'{n}.het.{"".join(str(x) for x in ha)}') - if masked[m]: + atom = self._atoms[m] + if len(hb := atom.hybridization) > 1 or (hb and hb[0] != 4): + hh.append(f'{n}.hyb.' + ''.join(hybridization_str[x] for x in hb)) + if isinstance(atom, ExtendedQuery) and (ha := atom.heteroatoms): + hh.append(f'{n}.het.' + ''.join(str(x) for x in ha)) + if atom.masked: hh.append(f'{n}.msk.1') if len(hh) > 1: cx.append(':'.join(hh)) @@ -583,42 +573,36 @@ def _format_cxsmiles(self: 'QueryContainer', order): def _format_atom(self: 'QueryContainer', n, adjacency, **kwargs): atom = self._atoms[n] - charge = self._charges[n] - hybridization = self._hybridizations[n] - neighbors = self._neighbors[n] - hydrogens = self._hydrogens[n] - rings = self._rings_sizes[n] - - if atom.isotope: + if isinstance(atom, QueryElement) and atom.isotope: smi = ['[', str(atom.isotope), atom.atomic_symbol] else: smi = ['[', atom.atomic_symbol] - if n in self._atoms_stereo: # mark atom as chiral. it's too difficult to set correct sign - smi.append(';@?') - if n in self._allenes_stereo: - smi.append(';@?') + if isinstance(atom, ExtendedQuery): + if atom.stereo is not None: + # mark atom as chiral. it's too difficult to set correct sign + smi.append(';@?') - if charge: - smi.append(';') - smi.append(charge_str[charge]) + if atom.charge: + smi.append(';') + smi.append(charge_str[atom.charge]) - if hydrogens: # h implicit-H-count implicit hydrogens - smi.append(';') - smi.append(','.join(f'h{x}' for x in hydrogens)) + if atom.implicit_hydrogens: # h implicit-H-count implicit hydrogens + smi.append(';') + smi.append(','.join(f'h{x}' for x in atom.implicit_hydrogens)) - if neighbors: # D degree explicit connections + if atom.neighbors: # D degree explicit connections smi.append(';') - smi.append(','.join(f'D{x}' for x in neighbors)) + smi.append(','.join(f'D{x}' for x in atom.neighbors)) - if rings: + if isinstance(atom, ExtendedQuery) and atom.ring_sizes: smi.append(';') - if rings[0]: - smi.append(','.join(f'r{x}' for x in rings)) + if atom.ring_sizes[0]: + smi.append(','.join(f'r{x}' for x in atom.ring_sizes)) else: smi.append('!R') - if len(hybridization) == 1 and hybridization[0] == 4: # only aromatic. other marks in cx extension + if len(atom.hybridization) == 1 and atom.hybridization[0] == 4: # only aromatic. other marks in cx extension smi.append(';a') smi.append(']') diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py index 01dbd26e..6fe91b76 100644 --- a/chython/algorithms/stereo/graph.py +++ b/chython/algorithms/stereo/graph.py @@ -72,12 +72,10 @@ def tetrahedrons(self: 'Container') -> Tuple[int, ...]: """ atoms = self._atoms bonds = self._bonds - charges = self._charges - radicals = self._radicals tetra = [] for n, atom in atoms.items(): - if atom.atomic_number == 6 and not charges[n] and not radicals[n]: + if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: env = bonds[n] if all(int(x) == 1 for x in env.values()): if sum(int(x) for x in env.values()) > 4: @@ -89,9 +87,11 @@ def clean_stereo(self: 'Container'): """ Remove stereo data. """ - self._atoms_stereo.clear() - self._allenes_stereo.clear() - self._cis_trans_stereo.clear() + for a in self._atoms.values(): + a._stereo = None + for _, bs in self._bonds: + for b in bs.values(): + b._stereo = None # flush twice, but it should be still faster self.flush_cache() def get_mapping(self: 'Container', other: 'Container', **kwargs): @@ -156,7 +156,7 @@ def _translate_tetrahedron_sign(self: 'Container', n, env, s=None): :param s: if None, use existing sign else translate given to molecule """ if s is None: - s = self._atoms_stereo[n] + s = self._atoms[n].stereo order = self._stereo_tetrahedrons[n] if len(order) == 3: diff --git a/chython/periodictable/base/__init__.py b/chython/periodictable/base/__init__.py index f8ca87e8..75806828 100644 --- a/chython/periodictable/base/__init__.py +++ b/chython/periodictable/base/__init__.py @@ -21,4 +21,5 @@ from .query import * -__all__ = ['Element', 'DynamicElement', 'Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Element', 'DynamicElement', 'Query', 'ExtendedQuery', + 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index fc26c962..325c0947 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -47,13 +47,17 @@ def _validate(value, prop): class Query(ABC): - __slots__ = ('_neighbors', '_hybridization', '_masked', '_stereo') + __slots__ = ('_neighbors', '_hybridization', '_masked') def __init__(self): self._neighbors = () self._hybridization = () self._masked = False - self._stereo = None + + @property + @abstractmethod + def atomic_symbol(self) -> str: + ... @property def neighbors(self) -> Tuple[int, ...]: @@ -96,20 +100,12 @@ def masked(self, value): raise TypeError('masked should be bool') self._masked = value - @property - def stereo(self): - return self._stereo - def copy(self, full=False): copy = object.__new__(self.__class__) copy._neighbors = self.neighbors copy._hybridization = self.hybridization - if full: - copy._masked = self.masked - copy._stereo = self.stereo - else: - copy._masked = False - copy._stereo = None + + copy._masked = self.masked if full else False return copy def __copy__(self): @@ -120,7 +116,7 @@ def __repr__(self): class ExtendedQuery(Query, ABC): - __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens') + __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens', '_stereo') def __init__(self): super().__init__() @@ -129,6 +125,7 @@ def __init__(self): self._heteroatoms = () self._ring_sizes = () self._implicit_hydrogens = () + self._stereo = None @property def charge(self) -> int: @@ -200,6 +197,10 @@ def ring_sizes(self, value): else: raise TypeError('rings should be int or list or tuple of ints') + @property + def stereo(self): + return self._stereo + def copy(self, full=False): copy = super().copy(full=full) copy._charge = self.charge @@ -207,6 +208,8 @@ def copy(self, full=False): copy._heteroatoms = self.heteroatoms copy._implicit_hydrogens = self.implicit_hydrogens copy._ring_sizes = self.ring_sizes + + copy._stereo = self.stereo if full else None return copy @@ -499,4 +502,4 @@ def __hash__(self): self.hybridization, self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) -__all__ = ['Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Query', 'ExtendedQuery', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] From 04ef91e9f42219c8efaca4fecb8c972b25c2e525 Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 16:16:41 +0100 Subject: [PATCH 07/68] kekule adapted --- chython/algorithms/aromatics/kekule.py | 141 ++++++++++++------------- 1 file changed, 67 insertions(+), 74 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index ef9834e9..de51744b 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -46,7 +46,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool bonds = self._bonds atoms = set() for n, m, b in kekule: - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b atoms.add(n) atoms.add(m) for n in atoms: @@ -65,7 +65,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): bonds = copy._bonds atoms = set() for n, m, b in form: - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b atoms.add(n) atoms.add(m) for n in atoms: @@ -73,8 +73,8 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): yield copy def __fix_rings(self: 'MoleculeContainer'): + atoms = self._atoms bonds = self._bonds - charges = self._charges seen = set() for q, af, bf, mm in rules: for mapping in q.get_mapping(self, automorphism_filter=False): @@ -85,11 +85,11 @@ def __fix_rings(self: 'MoleculeContainer'): for n, c in af.items(): n = mapping[n] - charges[n] = c + atoms[n]._charge = c for n, m, b in bf: n = mapping[n] m = mapping[m] - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b if seen: self.flush_cache() return True @@ -97,11 +97,7 @@ def __fix_rings(self: 'MoleculeContainer'): def __prepare_rings(self: 'MoleculeContainer'): atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - hydrogens = self._hydrogens - neighbors = self.neighbors rings = defaultdict(list) # aromatic skeleton pyrroles = set() @@ -168,133 +164,130 @@ def __prepare_rings(self: 'MoleculeContainer'): if any(len(rings[n]) != 2 for n in double_bonded): # double bonded never condensed raise InvalidAromaticRing('quinone valence error') for n in double_bonded: - if atoms[n].atomic_number == 7: - if charges[n] != 1: + atom = atoms[n] + if atom.atomic_number == 7: + if atom.charge != 1: raise InvalidAromaticRing('quinone should be charged N atom') - elif atoms[n].atomic_number not in (6, 15, 16, 33, 34, 52) or charges[n]: + elif atom.atomic_number not in (6, 15, 16, 33, 34, 52) or atom.charge: raise InvalidAromaticRing('quinone should be neutral S, Se, Te, C, P, As atom') for n in rings: - an = atoms[n].atomic_number - ac = charges[n] - ab = neighbors(n) - if an == 6: # carbon - if ac == 0: - if ab not in (2, 3): + atom = atoms[n] + if atom.atomic_number == 6: # carbon + if atom.charge == 0: + if atom.neighbors not in (2, 3): raise InvalidAromaticRing - elif ac in (-1, 1): - if radicals[n]: - if ab == 2: + elif atom.charge in (-1, 1): + if atom.is_radical: + if atom.neighbors == 2: double_bonded.add(n) else: raise InvalidAromaticRing - elif ab == 3: + elif atom.neighbors == 3: double_bonded.add(n) - elif ab == 2: # benzene (an|cat)ion or pyrrole + elif atom.neighbors == 2: # benzene (an|cat)ion or pyrrole pyrroles.add(n) else: raise InvalidAromaticRing else: raise InvalidAromaticRing - elif an in (7, 15, 33): - if ac == 0: # pyrrole or pyridine. include radical pyrrole - if radicals[n]: - if ab != 2: # only pyrrole radical + elif atom.atomic_number in (7, 15, 33): + if atom.charge == 0: # pyrrole or pyridine. include radical pyrrole + if atom.is_radical: + if atom.neighbors != 2: # only pyrrole radical raise InvalidAromaticRing double_bonded.add(n) - elif ab == 3: - if an == 7: # pyrrole only possible + elif atom.neighbors == 3: + if atom.atomic_number == 7: # pyrrole only possible double_bonded.add(n) else: # P(III) or P(V)H pyrroles.add(n) - elif ab == 2: - ah = hydrogens[n] - if ah is None: # pyrrole or pyridine + elif atom.neighbors == 2: + if atom.implicit_hydrogens is None: # pyrrole or pyridine pyrroles.add(n) - elif ah == 1: # only pyrrole + elif atom.implicit_hydrogens == 1: # only pyrrole double_bonded.add(n) - elif ah: # too many hydrogens for aromatic rings + elif atom.implicit_hydrogens: # too many hydrogens for aromatic rings raise InvalidAromaticRing - elif ab != 4 or an not in (15, 33): # P(V) in ring [P;a](-R1)-R2 + elif atom.neighbors != 4 or atom.atomic_number not in (15, 33): # P(V) in ring [P;a](-R1)-R2 raise InvalidAromaticRing - elif ac == -1: # pyrrole only - if ab != 2 or radicals[n]: + elif atom.charge == -1: # pyrrole only + if atom.neighbors != 2 or atom.is_radical: raise InvalidAromaticRing double_bonded.add(n) - elif ac != 1: + elif atom.charge != 1: raise InvalidAromaticRing - elif radicals[n]: - if ab != 2: # not cation-radical pyridine + elif atom.is_radical: + if atom.neighbors != 2: # not cation-radical pyridine raise InvalidAromaticRing - elif ab == 2: # pyrrole cation or protonated pyridine + elif atom.neighbors == 2: # pyrrole cation or protonated pyridine pyrroles.add(n) - elif ab != 3: # not pyridine oxyde + elif atom.neighbors != 3: # not pyridine oxyde raise InvalidAromaticRing - elif an == 8: # furan - if ab == 2: - if ac == 0: - if radicals[n]: + elif atom.atomic_number == 8: # furan + if atom.neighbors == 2: + if atom.charge == 0: + if atom.is_radical: raise InvalidAromaticRing('radical oxygen') double_bonded.add(n) - elif ac == 1: - if radicals[n]: # furan cation-radical + elif atom.charge == 1: + if atom.is_radical: # furan cation-radical double_bonded.add(n) # pyrylium else: raise InvalidAromaticRing('invalid oxygen charge') else: raise InvalidAromaticRing('Triple-bonded oxygen') - elif an in (16, 34, 52): # thiophene + elif atom.atomic_number in (16, 34, 52): # thiophene if n not in double_bonded: # not sulphoxyde nor sulphone - if ab == 2: - if radicals[n]: - if ac == 1: + if atom.neighbors == 2: + if atom.is_radical: + if atom.charge == 1: double_bonded.add(n) else: raise InvalidAromaticRing('S, Se, Te cation-radical expected') - if ac == 0: + if atom.charge == 0: double_bonded.add(n) - elif ac != 1: + elif atom.charge != 1: raise InvalidAromaticRing('S, Se, Te cation in benzene like ring expected') - elif ab == 3: - if radicals[n]: - if ac: + elif atom.neighbors == 3: + if atom.is_radical: + if atom.charge: raise InvalidAromaticRing('S, Se, Te ion-radical ring') double_bonded.add(n) - elif ac == 1: + elif atom.charge == 1: double_bonded.add(n) - elif ac: + elif atom.charge: raise InvalidAromaticRing('S, Se, Te invalid charge ring') else: raise InvalidAromaticRing('S, Se, Te hypervalent ring') - elif an == 5: # boron - if ac == 0: - if ab == 2: - if radicals[n]: # C=1O[B]OC=1 + elif atom.atomic_number == 5: # boron + if atom.charge == 0: + if atom.neighbors == 2: + if atom.is_radical: # C=1O[B]OC=1 double_bonded.add(n) else: - ah = hydrogens[n] - if ah is None: # b1ccccc1, C=1OBOC=1 or B1C=CC=N1 + if atom.implicit_hydrogens is None: # b1ccccc1, C=1OBOC=1 or B1C=CC=N1 pyrroles.add(n) - elif ah == 1: # C=1O[BH]OC=1 or [BH]1C=CC=N1 + elif atom.implicit_hydrogens == 1: # C=1O[BH]OC=1 or [BH]1C=CC=N1 double_bonded.add(n) - elif ah: + elif atom.implicit_hydrogens: raise InvalidAromaticRing - elif not radicals[n]: + elif not atom.is_radical: double_bonded.add(n) else: raise InvalidAromaticRing - elif ac == 1: - if ab == 2 and not radicals[n]: + elif atom.charge == 1: + if atom.neighbors == 2 and not atom.is_radical: double_bonded.add(n) else: raise InvalidAromaticRing - elif ac == -1: - if ab == 2: - if not radicals[n]: # C=1O[B-]OC=1 or [bH-]1ccccc1 + elif atom.charge == -1: + if atom.neighbors == 2: + if not atom.is_radical: # C=1O[B-]OC=1 or [bH-]1ccccc1 pyrroles.add(n) # anion-radical is benzene like - elif radicals[n]: # C=1O[B-*](R)OC=1 + elif atom.is_radical: # C=1O[B-*](R)OC=1 double_bonded.add(n) else: pyrroles.add(n) From e5a2eaede1c3c138ea037d68dce013d7a5e403a7 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 2 Nov 2024 14:11:12 +0100 Subject: [PATCH 08/68] molecule constructor refactored --- chython/containers/molecule.py | 58 +---------- chython/files/_convert.py | 137 ++++++++++++++++++++------ chython/files/daylight/smiles.py | 108 +++----------------- chython/files/daylight/tokenize.py | 15 ++- chython/periodictable/base/element.py | 18 ++-- 5 files changed, 142 insertions(+), 194 deletions(-) diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 40205489..09fa158a 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -44,7 +44,7 @@ class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, X3domMolecule): - __slots__ = ('_meta', '_name', '_changed', '_backup') + __slots__ = ('_meta', '_name', '_conformers', '_changed', '_backup') def __init__(self): super().__init__() @@ -93,52 +93,6 @@ def environment(self, atom: int, include_bond: bool = True, include_atom: bool = return tuple(self._bonds[atom].items()) return tuple(self._bonds[atom]) - def neighbors(self, n: int) -> int: - """number of neighbors atoms excluding any-bonded""" - return self._atoms[n].neighbors - - @cached_args_method - def hybridization(self, n: int) -> int: - """ - Atom hybridization. - - 1 - if atom has zero or only single bonded neighbors, 2 - if has only one double bonded neighbor and any amount - of single bonded, 3 - if has one triple bonded and any amount of double and single bonded neighbors or - two and more double bonded and any amount of single bonded neighbors, 4 - if atom in aromatic ring. - """ - return self._atoms[n].hybridization - - @cached_args_method - def heteroatoms(self, n: int) -> int: - """ - Number of neighbored heteroatoms (not carbon or hydrogen) except any-bond connected. - """ - return self._atoms[n].heteroatoms - - def implicit_hydrogens(self, n: int) -> Optional[int]: - """ - Number of implicit hydrogen atoms connected to atom. - - Returns None if count are ambiguous. - """ - return self._atoms[n].implicit_hydrogens - - def explicit_hydrogens(self, n: int) -> int: - """ - Number of explicit hydrogen atoms connected to atom. - - Take into account any type of bonds with hydrogen atoms. - """ - return self._atoms[n].explicit_hydrogens - - def total_hydrogens(self, n: int) -> int: - """ - Number of hydrogen atoms connected to atom. - - Take into account any type of bonds with hydrogen atoms. - """ - return self._atoms[n].total_hydrogens - @cached_args_method def adjacency_matrix(self, set_bonds=False, /): """ @@ -743,8 +697,7 @@ def _calc_implicit(self, n: int): """ Set firs possible hydrogens count based on rules """ - atoms = self._atoms - atom = atoms[n] + atom = self._atoms[n] if atom.atomic_number == 1: # hydrogen nether has implicit H atom._implicit_hydrogens = 0 return @@ -762,7 +715,7 @@ def _calc_implicit(self, n: int): return elif order != 8: # any bond used for complexes explicit_sum += order - explicit_dict[(order, atoms[m].atomic_number)] += 1 + explicit_dict[(order, self._atoms[m].atomic_number)] += 1 if aroma == 2: if explicit_sum == 0: # H-Ar @@ -794,8 +747,7 @@ def _calc_implicit(self, n: int): atom._implicit_hydrogens = None # rule not found def _check_implicit(self, n: int, h: int) -> bool: - atoms = self._atoms - atom = atoms[n] + atom = self._atoms[n] if atom.atomic_number == 1: # hydrogen nether has implicit H return h == 0 @@ -808,7 +760,7 @@ def _check_implicit(self, n: int, h: int) -> bool: return False elif order != 8: # any bond used for complexes explicit_sum += order - explicit_dict[(order, atoms[m].atomic_number)] += 1 + explicit_dict[(order, self._atoms[m].atomic_number)] += 1 try: rules = atom.valence_rules(explicit_sum) diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 2de1ff2b..819389e1 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Ramil Nugmanov +# Copyright 2023, 2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -22,31 +22,27 @@ from ..periodictable import Element -def create_molecule(data, *, skip_calc_implicit=False, ignore_bad_isotopes=False, _cls=MoleculeContainer): - g = object.__new__(_cls) - pm = {} - atoms = {} - plane = {} - charges = {} - radicals = {} - bonds = {} +def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False, + keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, ignore=True, + ignore_carbon_radicals=False, _cls=MoleculeContainer): + g = _cls() + atoms = g._atoms + bonds = g._bonds mapping = data['mapping'] for n, atom in enumerate(data['atoms']): + if abs(atom['charge']) > 4: + raise ValueError('formal charge should be in range [-4, 4]') n = mapping[n] - e = Element.from_symbol(atom['element']) + e = Element.from_symbol(atom.pop('element')) try: - atoms[n] = e(atom['isotope']) + atoms[n] = e(**atom) except ValueError: if not ignore_bad_isotopes: raise - atoms[n] = e() # reset isotope mark on errors. + del atom['isotope'] # reset isotope mark on errors. + atoms[n] = e(**atom) bonds[n] = {} - if (charge := atom['charge']) > 4 or charge < -4: - raise ValueError('formal charge should be in range [-4, 4]') - charges[n] = charge - radicals[n] = atom['is_radical'] - plane[n] = (atom['x'], atom['y']) - pm[n] = atom['mapping'] + for n, m, b in data['bonds']: n, m = mapping[n], mapping[m] if n == m: @@ -57,26 +53,108 @@ def create_molecule(data, *, skip_calc_implicit=False, ignore_bad_isotopes=False raise ValueError('atoms already bonded') bonds[n][m] = bonds[m][n] = Bond(b) if any(a['z'] for a in data['atoms']): - conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] - else: - conformers = [] + # store conformer + g._conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] if data['log']: # store log to the meta if data['meta'] is None: data['meta'] = {} data['meta']['chython_parsing_log'] = data['log'] + g._meta = data['meta'] - g.__setstate__({'atoms': atoms, 'bonds': bonds, 'meta': data['meta'], 'plane': plane, 'parsed_mapping': pm, - 'charges': charges, 'radicals': radicals, 'name': data['title'], 'conformers': conformers, - 'atoms_stereo': {}, 'allenes_stereo': {}, 'cis_trans_stereo': {}, 'hydrogens': {}}) - if not skip_calc_implicit: - for n in atoms: + if skip_calc_implicit: # don't calc Hs. e.g. INCHI + return g + + implicit_mismatch = {} + radicalized = [] + # precalculate Hs + for n, a in atoms.items(): + if a.implicit_hydrogens is None: + # let's try to calculate. in case of errors just keep as is. radicals in smiles should be in [brackets], + # thus has implicit Hs value g._calc_implicit(n) + elif keep_implicit: + # keep given Hs count as is + continue + else: # recheck given Hs count + h = a.implicit_hydrogens # parsed Hs + g._calc_implicit(n) # recalculate + if a.implicit_hydrogens is None: # atom has invalid valence or aromatic ring. + if a.hybridization == 4: + # this is aromatic ring. just restore given H count. + a._implicit_hydrogens = h + # rare H0 case + if (not keep_radicals and not ignore_aromatic_radicals + and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) + and sum(b.order != 8 for b in bonds[n].values()) == 2): + # c[c]c - aromatic B,C,N,P radical + a._is_radical = True + radicalized.append(n) + elif not keep_radicals and not a.is_radical: # CXSMILES radical not set. + # SMILES doesn't code radicals. so, let's try to guess. + a._is_radical = True + if g._check_implicit(n, h): # radical form is valid + radicalized.append(n) + a._implicit_hydrogens = h + elif ignore: # radical state also has errors. + a._is_radical = False # reset radical state + implicit_mismatch[n] = h + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif h != a.implicit_hydrogens: # H count mismatch. + if a.hybridization == 4: + if (not keep_radicals + and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) + and sum(b.order != 8 for b in bonds[n].values()) == 2): + # c[c]c - aromatic B,C,N,P radical + a._implicit_hydrogens = 0 + a._is_radical = True + radicalized.append(n) + elif ignore: + implicit_mismatch[n] = h + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif g._check_implicit(n, h): # set another possible implicit state. probably Al, P + a._implicit_hydrogens = h + elif not keep_radicals and not a.is_radical: # CXSMILES radical is not set. try radical form + a._is_radical = True + if g._check_implicit(n, h): + a._implicit_hydrogens = h + radicalized.append(n) + # radical state also has errors. + elif ignore: + a._is_radical = False # reset radical state + implicit_mismatch[n] = h + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif ignore: # just ignore it + implicit_mismatch[n] = h + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + + if ignore_carbon_radicals: + for n in radicalized: + a = atoms[n] + if a.atomic_number == 6: + a._is_radical = False + a._implicit_hydrogens += 1 + data['log'].append(f'carbon radical {n} replaced with implicit hydrogen') + elif radicalized: + g.meta['chython_radicalized_atoms'] = radicalized + if data['log'] and 'chython_parsing_log' not in g.meta: + g.meta['chython_parsing_log'] = data['log'] + if implicit_mismatch: + g.meta['chython_implicit_mismatch'] = implicit_mismatch return g def create_reaction(data, *, ignore=True, skip_calc_implicit=False, ignore_bad_isotopes=False, - _r_cls=ReactionContainer, _m_cls=MoleculeContainer): + keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, + ignore_carbon_radicals=False, _r_cls=ReactionContainer, _m_cls=MoleculeContainer): rc, pr, rg = [], [], [] for ms, pms, gr in ((rc, data['reactants'], 'reactant'), (pr, data['products'], 'products'), @@ -85,7 +163,10 @@ def create_reaction(data, *, ignore=True, skip_calc_implicit=False, ignore_bad_i for n, m in enumerate(pms): try: ms.append(create_molecule(m, skip_calc_implicit=skip_calc_implicit, - ignore_bad_isotopes=ignore_bad_isotopes, _cls=_m_cls)) + ignore_bad_isotopes=ignore_bad_isotopes, keep_implicit=keep_implicit, + keep_radicals=keep_radicals, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + ignore_carbon_radicals=ignore_carbon_radicals, _cls=_m_cls)) except ValueError as e: if not ignore: raise diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 2271a052..d491c866 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -143,11 +143,12 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: atom_map[x]['is_radical'] = True postprocess_parsed_reaction(record, remap=remap, ignore=ignore) - rxn = create_reaction(record, ignore_bad_isotopes=ignore_bad_isotopes, _r_cls=_r_cls, _m_cls=_m_cls) + rxn = create_reaction(record, ignore_bad_isotopes=ignore_bad_isotopes, keep_radicals=False, + ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + _r_cls=_r_cls, _m_cls=_m_cls) for mol, tmp in zip(rxn.molecules(), chain(record['reactants'], record['reagents'], record['products'])): - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, - ignore_aromatic_radicals=ignore_aromatic_radicals) + postprocess_molecule(mol, tmp, ignore_stereo=ignore_stereo) return rxn else: record = parser(smiles_tokenize(smi), not ignore) @@ -156,104 +157,17 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: record['log'].extend(log) postprocess_parsed_molecule(record, remap=remap, ignore=ignore) - mol = create_molecule(record, ignore_bad_isotopes=ignore_bad_isotopes, _cls=_m_cls) - postprocess_molecule(mol, record, ignore=ignore, ignore_stereo=ignore_stereo, - ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, - ignore_aromatic_radicals=ignore_aromatic_radicals) + mol = create_molecule(record, ignore_bad_isotopes=ignore_bad_isotopes, keep_radicals=False, + ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + _cls=_m_cls) + postprocess_molecule(mol, record, ignore_stereo=ignore_stereo) return mol -def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, ignore_carbon_radicals=False, - keep_implicit=False, ignore_aromatic_radicals=True): +def postprocess_molecule(molecule, data, *, ignore_stereo=False): mapping = data['mapping'] - atoms = molecule._atoms - bonds = molecule._bonds - charges = molecule._charges - hydrogens = molecule._hydrogens - radicals = molecule._radicals - hyb = molecule.hybridization - radicalized = [] - - implicit_mismatch = {} - if 'chython_parsing_log' in molecule.meta: - log = molecule.meta['chython_parsing_log'] - else: - log = [] - - for n, a in enumerate(data['atoms']): - h = a['hydrogen'] - if h is None: # simple atom token - continue - # bracket token should always contain implicit hydrogens count. - n = mapping[n] - if keep_implicit: # override any calculated hydrogens count. - hydrogens[n] = h - elif (hc := hydrogens[n]) is None: # atom has invalid valence or aromatic ring. - if hyb(n) == 4: # this is aromatic rings. just store given H count. - hydrogens[n] = h - # rare H0 case - if (not ignore_aromatic_radicals and not h and not charges[n] and not radicals[n] and - atoms[n].atomic_number in (5, 6, 7, 15) and sum(b.order != 8 for b in bonds[n].values()) == 2): - # c[c]c - aromatic B,C,N,P radical - radicals[n] = True - radicalized.append(n) - elif not radicals[n]: # CXSMILES radical not set. - # SMILES doesn't code radicals. so, let's try to guess. - radicals[n] = True - if molecule._check_implicit(n, h): # radical form is valid - radicalized.append(n) - hydrogens[n] = h - elif ignore: # radical state also has errors. - radicals[n] = False # reset radical state - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif hc != h: # H count mismatch. - if hyb(n) == 4: - if not h and not charges[n] and not radicals[n] and atoms[n].atomic_number in (5, 6, 7, 15) and \ - sum(b.order != 8 for b in bonds[n].values()) == 2: - # c[c]c - aromatic B,C,N,P radical - hydrogens[n] = 0 - radicals[n] = True - radicalized.append(n) - elif ignore: - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif molecule._check_implicit(n, h): # set another possible implicit state. probably Al, P - hydrogens[n] = h - elif not radicals[n]: # CXSMILES radical is not set. try radical form - radicals[n] = True - if molecule._check_implicit(n, h): - hydrogens[n] = h - radicalized.append(n) - # radical state also has errors. - elif ignore: - radicals[n] = False # reset radical state - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif ignore: # just ignore it - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - - if ignore_carbon_radicals: - for n in radicalized: - if atoms[n].atomic_number == 6: - radicals[n] = False - hydrogens[n] += 1 - log.append(f'carbon radical {n} replaced with implicit hydrogen') - - if implicit_mismatch: - molecule.meta['chython_implicit_mismatch'] = implicit_mismatch - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log if ignore_stereo: return diff --git a/chython/files/daylight/tokenize.py b/chython/files/daylight/tokenize.py index 645d87e9..6bf1eb8c 100644 --- a/chython/files/daylight/tokenize.py +++ b/chython/files/daylight/tokenize.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from re import compile, fullmatch, match, search +from re import compile, match, search from .._mdl import common_isotopes from ...containers.bonds import QueryBond from ...exceptions import IncorrectSmiles, IncorrectSmarts @@ -243,7 +243,7 @@ def _tokenize(smiles): def _atom_parse(token): # [isotope]Element[element][@[@]][H[n]][+-charge][:mapping] - _match = fullmatch(atom_re, token) + _match = atom_re.fullmatch(token) if _match is None: raise IncorrectSmiles(f'atom token invalid {token}') isotope, element, stereo, hydrogen, charge, mapping = _match.groups() @@ -275,16 +275,14 @@ def _atom_parse(token): mapping = int(mapping[1:]) except ValueError: raise IncorrectSmiles('invalid mapping token') - else: - mapping = 0 if element in ('c', 'n', 'o', 'p', 's', 'as', 'se', 'b', 'te'): _type = 8 element = element.capitalize() else: _type = 0 - return _type, {'element': element, 'isotope': isotope, 'mapping': mapping, 'charge': charge, 'is_radical': False, - 'x': 0., 'y': 0., 'z': 0., 'hydrogen': hydrogen, 'stereo': stereo} + return _type, {'element': element, 'isotope': isotope, 'parsed_mapping': mapping, 'charge': charge, + 'implicit_hydrogens': hydrogen, 'stereo': stereo} def _query_parse(token): @@ -372,8 +370,7 @@ def smiles_tokenize(smi): out = [] for token_type, token in tokens: if token_type in (0, 8): # simple atom - out.append((token_type, {'element': token, 'isotope': None, 'mapping': 0, 'charge': 0, 'is_radical': False, - 'x': 0., 'y': 0., 'z': 0., 'hydrogen': None, 'stereo': None})) + out.append((token_type, {'element': token})) elif token_type == 5: out.append(_atom_parse(token)) elif token_type == 10: diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 943d1128..04deaba2 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -25,11 +25,14 @@ class Element(ABC): __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens', - '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_xyz', + '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_neighbors', '_heteroatoms', '_hybridization', '_ring_sizes', '_in_ring') __class_cache__ = {} - def __init__(self, isotope: Optional[int] = None): + def __init__(self, isotope: Optional[int] = None, *, + charge: int = 0, is_radical: bool = False, x: float = 0, y: float = 0, + implicit_hydrogens: Optional[int] = None, stereo: Optional[bool] = None, + parsed_mapping: Optional[int] = None): """ Element object with specified isotope @@ -41,11 +44,12 @@ def __init__(self, isotope: Optional[int] = None): elif isotope is not None: raise TypeError('integer isotope number required') self._isotope = isotope - self._charge = 0 - self._is_radical = False - self._x = self._y = 0 - self._implicit_hydrogens = None - self._stereo = None + self._charge = charge + self._is_radical = is_radical + self._x, self._y = x, y + self._implicit_hydrogens = implicit_hydrogens + self._stereo = stereo + self._parsed_mapping = parsed_mapping self._explicit_hydrogens = 0 self._neighbors = 0 From e8a4986e65b0315fb8a84f77fe802753590d08f1 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 2 Nov 2024 20:09:07 +0100 Subject: [PATCH 09/68] some progress in stereo --- chython/algorithms/isomorphism.py | 2 +- chython/algorithms/stereo/graph.py | 96 ++++++++++++++------------- chython/algorithms/stereo/molecule.py | 18 ++--- chython/files/_convert.py | 8 +-- chython/files/_mapping.py | 8 +-- chython/files/daylight/smarts.py | 19 +++--- chython/files/daylight/smiles.py | 9 ++- chython/files/daylight/tokenize.py | 57 +++++----------- chython/files/libinchi/wrapper.py | 4 +- chython/periodictable/base/element.py | 25 ++++--- chython/periodictable/base/query.py | 55 +++++++++------ 11 files changed, 151 insertions(+), 150 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index e2d95da3..a40188a6 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -361,7 +361,7 @@ def _cython_compiled_query(self): else: if isinstance(a, ListElement): v1 = v2 = 0 - for n in a._numbers: + for n in a.atomic_numbers: if n > 56: if n > 116: # Ts, Og n = 116 diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py index 6fe91b76..bb7e5ebb 100644 --- a/chython/algorithms/stereo/graph.py +++ b/chython/algorithms/stereo/graph.py @@ -70,13 +70,10 @@ def tetrahedrons(self: 'Container') -> Tuple[int, ...]: """ Carbon sp3 atoms numbers. """ - atoms = self._atoms - bonds = self._bonds - tetra = [] - for n, atom in atoms.items(): + for n, atom in self._atoms.items(): if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: - env = bonds[n] + env = self._bonds[n] if all(int(x) == 1 for x in env.values()): if sum(int(x) for x in env.values()) > 4: continue @@ -157,14 +154,15 @@ def _translate_tetrahedron_sign(self: 'Container', n, env, s=None): """ if s is None: s = self._atoms[n].stereo + if s is None: + raise KeyError order = self._stereo_tetrahedrons[n] if len(order) == 3: if len(env) == 4: # hydrogen atom passed to env - atoms = self._atoms # hydrogen always last in order try: - order = (*order, next(x for x in env if atoms[x].atomic_number == 1)) # see translate scheme + order = (*order, next(x for x in env if self._atoms[x].atomic_number == 1)) # see translate scheme except StopIteration: raise KeyError elif len(env) != 3: # pyramid or tetrahedron expected @@ -187,21 +185,24 @@ def _translate_cis_trans_sign(self: 'Container', n, m, nn, nm, s=None): :param nm: neighbor of last atom :param s: if None, use existing sign else translate given to molecule """ + try: + n0, n1, n2, n3 = self._stereo_cis_trans[(n, m)] + except KeyError: + n0, n1, n2, n3 = self._stereo_cis_trans[(m, n)] + n, m = m, n # in alkenes sign not order depended + nn, nm = nm, nn + if s is None: - try: - s = self._cis_trans_stereo[(n, m)] - except KeyError: - s = self._cis_trans_stereo[(m, n)] - n, m = m, n # in alkenes sign not order depended - nn, nm = nm, nn + i, j = self._stereo_cis_trans_centers[n] + s = self._bonds[i][j].stereo + if s is None: + raise KeyError - atoms = self._atoms - n0, n1, n2, n3 = self._stereo_cis_trans[(n, m)] if nn == n0: # same start t0 = 0 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: t1 = 3 else: raise KeyError @@ -209,23 +210,23 @@ def _translate_cis_trans_sign(self: 'Container', n, m, nn, nm, s=None): t0 = 1 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: t1 = 2 else: raise KeyError - elif nn == n2 or n2 is None and atoms[nn].atomic_number == 1: + elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: t0 = 2 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: t1 = 3 else: raise KeyError - elif nn == n3 or n3 is None and atoms[nn].atomic_number == 1: + elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: t0 = 3 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: t1 = 2 else: raise KeyError @@ -246,15 +247,16 @@ def _translate_allene_sign(self: 'Container', c, nn, nm, s=None): :param s: if None, use existing sign else translate given to molecule """ if s is None: - s = self._allenes_stereo[c] + s = self._atoms[c].stereo + if s is None: + raise KeyError - atoms = self._atoms n0, n1, n2, n3 = self._stereo_allenes[c] if nn == n0: # same start t0 = 0 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: t1 = 3 else: raise KeyError @@ -262,23 +264,23 @@ def _translate_allene_sign(self: 'Container', c, nn, nm, s=None): t0 = 1 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: t1 = 2 else: raise KeyError - elif nn == n2 or n2 is None and atoms[nn].atomic_number == 1: + elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: t0 = 2 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: t1 = 3 else: raise KeyError - elif nn == n3 or n3 is None and atoms[nn].atomic_number == 1: + elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: t0 = 3 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: t1 = 2 else: raise KeyError @@ -388,21 +390,25 @@ def _stereo_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[in """ Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends """ - return {(n, m): env for (n, *mid, m), env in self._stereo_cumulenes.items() if not len(mid) % 2} - - @cached_property - def _stereo_cis_trans_paths(self) -> Dict[Tuple[int, int], Tuple[int, ...]]: - return {(path[0], path[-1]): path for path in self._stereo_cumulenes if not len(path) % 2} + stereo = {} + for path, env in self._stereo_cumulenes.items(): + if len(path) % 2: + continue + stereo[(path[0], path[-1])] = env + return stereo @cached_property - def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: + def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: """ - Cis-Trans terminal atoms to cis-trans key mapping + Cis-Trans terminal atoms to cis-trans key mapping. Key is central double bond in a cumulene chain. """ terminals = {} - for nm in self._stereo_cis_trans_paths: - n, m = nm - terminals[n] = terminals[m] = nm + for path in self._stereo_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + i = len(path) // 2 + terminals[n] = terminals[m] = (path[i - 1], path[i]) return terminals @cached_property @@ -411,8 +417,10 @@ def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: Cis-Trans terminal atoms counterparts """ counterpart = {} - for nm in self._stereo_cis_trans_paths: - n, m = nm + for path in self._stereo_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] counterpart[n] = m counterpart[m] = n return counterpart @@ -439,11 +447,7 @@ def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: """ Allene center atom to terminals mapping """ - return {c: (path[0], path[-1]) for c, path in self._stereo_allenes_paths.items()} - - @cached_property - def _stereo_allenes_paths(self) -> Dict[int, Tuple[int, ...]]: - return {path[len(path) // 2]: path for path in self._stereo_cumulenes if len(path) % 2} + return {path[len(path) // 2]: (path[0], path[-1]) for path in self._stereo_cumulenes if len(path) % 2} __all__ = ['Stereo'] diff --git a/chython/algorithms/stereo/molecule.py b/chython/algorithms/stereo/molecule.py index 016df003..7c443a0b 100644 --- a/chython/algorithms/stereo/molecule.py +++ b/chython/algorithms/stereo/molecule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -434,11 +434,9 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): return solved def __wedge_sign(self: 'MoleculeContainer', order): - plane = self._plane - if order[-1]: # allene s = self._translate_allene_sign(order[-2], *order[:2]) - v = _allene_sign(1, plane[order[2]], plane[order[3]], plane[order[1]]) + v = _allene_sign(1, self._atoms[order[2]].xy, self._atoms[order[3]].xy, self._atoms[order[1]].xy) if not v: logger.info(f'need 2d clean. allenes wedge stereo ambiguous for atom {order[-2]}') if s: @@ -450,11 +448,15 @@ def __wedge_sign(self: 'MoleculeContainer', order): s = self._translate_tetrahedron_sign(n, order[:-2]) # need recalculation if XY changed if len(order) == 5: - v = _pyramid_sign((*plane[n], 0), - (*plane[order[0]], 1), (*plane[order[1]], 0), (*plane[order[2]], 0)) + v = _pyramid_sign((*self._atoms[n].xy, 0), + (*self._atoms[order[0]].xy, 1), + (*self._atoms[order[1]].xy, 0), + (*self._atoms[order[2]].xy, 0)) else: - v = _pyramid_sign((*plane[order[3]], 0), - (*plane[order[0]], 1), (*plane[order[1]], 0), (*plane[order[2]], 0)) + v = _pyramid_sign((*self._atoms[order[3]].xy, 0), + (*self._atoms[order[0]].xy, 1), + (*self._atoms[order[1]].xy, 0), + (*self._atoms[order[2]].xy, 0)) if not v: logger.info(f'need 2d clean. tetrahedron wedge stereo ambiguous for atom {n}') if s: diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 819389e1..a450146e 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -30,16 +30,14 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False bonds = g._bonds mapping = data['mapping'] for n, atom in enumerate(data['atoms']): - if abs(atom['charge']) > 4: - raise ValueError('formal charge should be in range [-4, 4]') n = mapping[n] e = Element.from_symbol(atom.pop('element')) try: atoms[n] = e(**atom) - except ValueError: + except (ValueError, TypeError): if not ignore_bad_isotopes: raise - del atom['isotope'] # reset isotope mark on errors. + del atom['isotope'] # reset isotope mark on errors and try again. atoms[n] = e(**atom) bonds[n] = {} @@ -52,7 +50,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if n in bonds[m]: raise ValueError('atoms already bonded') bonds[n][m] = bonds[m][n] = Bond(b) - if any(a['z'] for a in data['atoms']): + if any(a.get('z') for a in data['atoms']): # store conformer g._conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] diff --git a/chython/files/_mapping.py b/chython/files/_mapping.py index e8d5915c..331eaa3e 100644 --- a/chython/files/_mapping.py +++ b/chython/files/_mapping.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -24,10 +24,10 @@ def postprocess_parsed_molecule(data, *, remap=False, ignore=True): if remap: remapped = list(range(1, len(data['atoms']) + 1)) else: - length = count(max(x['mapping'] for x in data['atoms']) + 1) + length = count(max(x.get('parsed_mapping') or 0 for x in data['atoms']) + 1) remapped, used = [], set() for n, atom in enumerate(data['atoms']): - m = atom['mapping'] + m = atom.get('parsed_mapping') if not m: remapped.append(next(length)) elif m in used: @@ -47,7 +47,7 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): for molecule in data[i]: used = set() for atom in molecule['atoms']: - m = atom['mapping'] + m = atom.get('parsed_mapping') if m: if m in used: if not ignore: diff --git a/chython/files/daylight/smarts.py b/chython/files/daylight/smarts.py index 2885b8a2..4f095e03 100644 --- a/chython/files/daylight/smarts.py +++ b/chython/files/daylight/smarts.py @@ -21,7 +21,7 @@ from .parser import parser from .tokenize import smarts_tokenize from ...containers import QueryContainer -from ...periodictable import QueryElement +from ...periodictable import ListElement, QueryElement cx_radicals = compile(r'\^[1-7]:[0-9]+(?:,[0-9]+)*') @@ -104,16 +104,17 @@ def smarts(data: str): g = QueryContainer() mapping = {} - free = count(max(a['mapping'] for a in data['atoms']) + 1) + free = count(max(a.get('parsed_mapping', 0) for a in data['atoms']) + 1) for i, a in enumerate(data['atoms']): - mapping[i] = n = a.pop('mapping') or next(global_free_masked if a['masked'] else free) + mapping[i] = n = a.pop('parsed_mapping', 0) or next(global_free_masked if a.get('masked') else free) e = a.pop('element') - if it := a.pop('isotope'): - if isinstance(e, int): - e = QueryElement.from_atomic_number(e)(it) - else: - e = QueryElement.from_symbol(e)(it) - g.add_atom(e, n, **a) + if isinstance(e, int): + e = QueryElement.from_atomic_number(e) + elif isinstance(e, str): + e = QueryElement.from_symbol(e) + else: + e = ListElement(e) + g.add_atom(e(**a), n) for n, m, b in data['bonds']: g.add_bond(mapping[n], mapping[m], b) diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index d491c866..82687724 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -171,14 +171,14 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if ignore_stereo: return - stereo_atoms = [(n, s) for n, a in enumerate(data['atoms']) if (s := a['stereo']) is not None] + stereo_atoms = [(n, s) for n, a in enumerate(data['atoms']) if (s := a.get('stereo')) is not None] if not stereo_atoms and not data['stereo_bonds']: return st = molecule._stereo_tetrahedrons sa = molecule._stereo_allenes sat = molecule._stereo_allenes_terminals - ctt = molecule._stereo_cis_trans_terminals + ctc = molecule._stereo_cis_trans_counterpart order = {mapping[n]: [mapping[m] for m in ms] for n, ms in data['order'].items()} @@ -203,9 +203,8 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): for n, ns in stereo_bonds.items(): if n in seen: continue - if n in ctt: - nm = ctt[n] - m = nm[1] if nm[0] == n else nm[0] + if n in ctc: + m = ctc[n] if m in stereo_bonds: seen.add(m) n2, s2 = stereo_bonds[m].popitem() diff --git a/chython/files/daylight/tokenize.py b/chython/files/daylight/tokenize.py index 6bf1eb8c..e8f3c7e6 100644 --- a/chython/files/daylight/tokenize.py +++ b/chython/files/daylight/tokenize.py @@ -17,10 +17,8 @@ # along with this program; if not, see . # from re import compile, match, search -from .._mdl import common_isotopes from ...containers.bonds import QueryBond from ...exceptions import IncorrectSmiles, IncorrectSmarts -from ...periodictable.element import ListElement # -,= OR bonds supported @@ -49,7 +47,6 @@ # 12: in ring bond -atomic_numbers = dict(enumerate(common_isotopes, 1)) iso_re = compile(r'^[0-9]+') chg_re = compile(r'[+-][1-4+-]?') mpp_re = compile(r':[1-9][0-9]*$') @@ -286,19 +283,18 @@ def _atom_parse(token): def _query_parse(token): + out = {} if isotope := match(iso_re, token): token = token[isotope.end():] # remove isotope substring - isotope = int(isotope.group()) + out['isotope'] = int(isotope.group()) if charge := search(chg_re, token): token = token[:charge.start()] + token[charge.end():] # remove charge substring - charge = charge_dict[charge.group()] - else: - charge = 0 + out['charge'] = charge_dict[charge.group()] + if mapping := search(mpp_re, token): token = token[:mapping.start()] - mapping = int(mapping.group()[1:]) - else: - mapping = 0 + out['parsed_mapping'] = int(mapping.group()[1:]) + if stereo := search(str_re, token): # drop stereo mark. unsupported token = token[:stereo.start()] + token[stereo.end():] @@ -308,35 +304,21 @@ def _query_parse(token): element = [int(x[1:]) if x.startswith('#') else x for x in element.split(',')] if len(element) == 1: element = element[0] - else: # only atoms supported - tmp = [] - for x in element: - if isinstance(x, int): - try: - tmp.append(atomic_numbers[x]) - except KeyError as e: - raise IncorrectSmiles('Invalid atomic number') from e - elif x in common_isotopes: - tmp.append(x) - else: - raise IncorrectSmarts('Invalid element symbol') - element = ListElement(tmp) else: raise IncorrectSmarts('Empty element') + out['element'] = element - hybridization = rings_sizes = neighbors = hydrogens = heteroatoms = None - masked = False for p in primitives[1:]: # parse hydrogens (h), neighbors (D), rings_sizes (r or !R), hybridization == 4 (a) if not p: continue elif p == 'a': # aromatic atom - hybridization = 4 + out['hybridization'] = 4 elif p == 'A': # ignore aliphatic mark. Ad-Hoc for Marwin. continue elif p == '!R': - rings_sizes = 0 + out['ring_sizes'] = 0 elif p == 'M': - masked = True + out['masked'] = True else: p = p.split(',') if len(p) != 1 and len({x[0] for x in p}) > 1: @@ -350,19 +332,16 @@ def _query_parse(token): raise IncorrectSmarts('Unsupported SMARTS primitive') if t == 'D': - neighbors = p + out['neighbors'] = p elif t == 'h': - hydrogens = p + out['implicit_hydrogens'] = p elif t == 'r': # r - rings_sizes = p + out['ring_sizes'] = p elif t == 'x': - heteroatoms = p + out['heteroatoms'] = p else: # z - hybridization = p - - return 0, {'element': element, 'isotope': isotope, 'mapping': mapping, 'charge': charge, 'is_radical': False, - 'heteroatoms': heteroatoms, 'hydrogens': hydrogens, 'neighbors': neighbors, - 'rings_sizes': rings_sizes, 'hybridization': hybridization, 'masked': masked} + out['hybridization'] = p + return 0, out def smiles_tokenize(smi): @@ -385,9 +364,7 @@ def smarts_tokenize(smi): out = [] for token_type, token in tokens: if token_type in (0, 8): # simple atom - out.append((0, {'element': token, 'isotope': None, 'mapping': 0, 'charge': 0, 'is_radical': False, - 'heteroatoms': None, 'hydrogens': None, 'neighbors': None, - 'rings_sizes': None, 'hybridization': None, 'masked': False})) + out.append((0, {'element': token})) elif token_type == 5: out.append(_query_parse(token)) else: diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 0fb7daf3..55749f34 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -138,7 +138,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): st = molecule._stereo_tetrahedrons sa = molecule._stereo_allenes - ctt = molecule._stereo_cis_trans_terminals + ctc = molecule._stereo_cis_trans_counterpart stereo = [] for n, ngb, s in data['stereo_atoms']: @@ -151,7 +151,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): stereo.append((molecule.add_atom_stereo, n, nn + 1, mn + 1, s)) for n, m, nn, nm, s in data['stereo_cumulenes']: n += 1 - if n in ctt: + if n in ctc: stereo.append((molecule.add_cis_trans_stereo, n, m + 1, nn + 1, nm + 1, s)) while stereo: diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 04deaba2..56e9f3d3 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -30,7 +30,7 @@ class Element(ABC): __class_cache__ = {} def __init__(self, isotope: Optional[int] = None, *, - charge: int = 0, is_radical: bool = False, x: float = 0, y: float = 0, + charge: int = 0, is_radical: bool = False, x: float = 0., y: float = 0., implicit_hydrogens: Optional[int] = None, stereo: Optional[bool] = None, parsed_mapping: Optional[int] = None): """ @@ -38,15 +38,11 @@ def __init__(self, isotope: Optional[int] = None, *, :param isotope: Isotope number of element """ - if isinstance(isotope, int): - if isotope not in self.isotopes_distribution: - raise ValueError(f'isotope number {isotope} impossible or not stable for {self.atomic_symbol}') - elif isotope is not None: - raise TypeError('integer isotope number required') - self._isotope = isotope - self._charge = charge - self._is_radical = is_radical - self._x, self._y = x, y + self.isotope = isotope + self.charge = charge + self.is_radical = is_radical + self.x, self.y = x, y + self._implicit_hydrogens = implicit_hydrogens self._stereo = stereo self._parsed_mapping = parsed_mapping @@ -81,6 +77,15 @@ def isotope(self) -> Optional[int]: """ return self._isotope + @isotope.setter + def isotope(self, value: Optional[int]): + if isinstance(value, int): + if value not in self.isotopes_distribution: + raise ValueError(f'isotope number {value} impossible or not stable for {self.atomic_symbol}') + elif value is not None: + raise TypeError('integer isotope number required') + self._isotope = value + @property def atomic_mass(self) -> float: mass = self.isotopes_masses diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 325c0947..2089bc17 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -49,10 +49,11 @@ def _validate(value, prop): class Query(ABC): __slots__ = ('_neighbors', '_hybridization', '_masked') - def __init__(self): - self._neighbors = () - self._hybridization = () - self._masked = False + def __init__(self, neighbors: Union[int, Tuple[int, ...], None] = None, + hybridization: Union[int, Tuple[int, ...], None] = None, masked: bool = False): + self.neighbors = neighbors + self.hybridization = hybridization + self.masked = masked @property @abstractmethod @@ -118,14 +119,16 @@ def __repr__(self): class ExtendedQuery(Query, ABC): __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens', '_stereo') - def __init__(self): - super().__init__() - self._charge = 0 - self._is_radical = False - self._heteroatoms = () - self._ring_sizes = () - self._implicit_hydrogens = () - self._stereo = None + def __init__(self, charge: int = 0, is_radical: bool = False, heteroatoms: Union[int, Tuple[int, ...], None] = None, + ring_sizes: Union[int, Tuple[int, ...], None] = None, + implicit_hydrogens: Union[int, Tuple[int, ...], None] = None, stereo: Optional[bool] = None, **kwargs): + super().__init__(**kwargs) + self.charge = charge + self.is_radical = is_radical + self.heteroatoms = heteroatoms + self.ring_sizes = ring_sizes + self.implicit_hydrogens = implicit_hydrogens + self._stereo = stereo @property def charge(self) -> int: @@ -292,14 +295,22 @@ def __hash__(self): class ListElement(ExtendedQuery): __slots__ = ('_elements', '__dict__') - def __init__(self, elements: List[str]): + def __init__(self, elements: List[str], **kwargs): """ Elements list """ if not isinstance(elements, (list, tuple)) or not elements: raise ValueError('invalid elements list') - super().__init__() - self._elements = tuple(elements) + tmp = [] + for x in elements: + if isinstance(x, int): + tmp.append(Element.from_atomic_number(x).__name__) + elif isinstance(x, str): + tmp.append(Element.from_symbol(x).__name__) + else: + raise ValueError(f'invalid element: {x}') + super().__init__(**kwargs) + self._elements = tuple(tmp) @property def atomic_symbol(self) -> str: @@ -366,11 +377,9 @@ def __repr__(self): class QueryElement(ExtendedQuery, ABC): __slots__ = ('_isotope',) - def __init__(self, isotope: Optional[int] = None): - if isotope is not None and not isinstance(isotope, int): - raise TypeError('isotope must be an int') - super().__init__() - self._isotope = isotope + def __init__(self, isotope: Optional[int] = None, **kwargs): + super().__init__(**kwargs) + self.isotope = isotope def __repr__(self): if self.isotope: @@ -392,6 +401,12 @@ def atomic_number(self) -> int: def isotope(self): return self._isotope + @isotope.setter + def isotope(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise TypeError('isotope must be an int') + self._isotope = value + @classmethod def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: """ From 0252529b90ffb3d3ce45f3bdc3dae4d8e0ad5265 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 2 Nov 2024 20:15:35 +0100 Subject: [PATCH 10/68] revert --- chython/algorithms/stereo/graph.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py index bb7e5ebb..8ad032fd 100644 --- a/chython/algorithms/stereo/graph.py +++ b/chython/algorithms/stereo/graph.py @@ -411,6 +411,19 @@ def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: terminals[n] = terminals[m] = (path[i - 1], path[i]) return terminals + @cached_property + def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: + """ + Cis-Trans terminal atoms to terminal pair mapping. + """ + terminals = {} + for path in self._stereo_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + terminals[n] = terminals[m] = (n, m) + return terminals + @cached_property def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: """ From 932a30f8a0a422e4fe7904780e2037a10941f26f Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 3 Nov 2024 14:36:38 +0100 Subject: [PATCH 11/68] another portion of fixes. --- chython/algorithms/aromatics/kekule.py | 4 +- chython/algorithms/aromatics/thiele.py | 34 +++++----- chython/algorithms/calculate2d/__init__.py | 70 +++++++++---------- chython/algorithms/depict.py | 69 +++++++++---------- chython/algorithms/smiles.py | 25 +++---- chython/algorithms/standardize/molecule.py | 4 +- chython/algorithms/standardize/resonance.py | 2 +- chython/algorithms/stereo/graph.py | 5 +- chython/algorithms/stereo/molecule.py | 74 +++++++++++++-------- chython/containers/molecule.py | 52 ++++++++++++++- chython/files/MRVrw.py | 12 ++-- chython/files/RDFrw.py | 18 ++--- chython/files/SDFrw.py | 10 +-- chython/files/_convert.py | 13 ++-- chython/files/_mdl/stereo.py | 39 ++--------- chython/files/daylight/smarts.py | 3 +- chython/files/xyz.py | 19 ++---- chython/periodictable/base/element.py | 13 ---- chython/reactor/base.py | 4 +- 19 files changed, 240 insertions(+), 230 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index de51744b..f1df888c 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -50,7 +50,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool atoms.add(n) atoms.add(m) for n in atoms: - self._calc_implicit(n) + self.calc_implicit(n) self.flush_cache() return True return fixed @@ -69,7 +69,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): atoms.add(n) atoms.add(m) for n in atoms: - copy._calc_implicit(n) + copy.calc_implicit(n) yield copy def __fix_rings(self: 'MoleculeContainer'): diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 43030a86..0b2ce586 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -56,9 +56,6 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: atoms = self._atoms bonds = self._bonds nsc = self.not_special_connectivity - sh = self.hybridization - charges = self._charges - hydrogens = self._hydrogens rings = defaultdict(set) # aromatic? skeleton. include quinones tetracycles = [] @@ -73,13 +70,13 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: # only B C N O P S with 2-3 neighbors. detects this: C1=CC=CP12=CC=CC=C2 if any(atoms[n].atomic_number not in (6, 7, 8, 16, 5, 15) or len(nsc[n]) > 3 for n in ring): continue - sp2 = sum(sh(n) == 2 for n in ring) + sp2 = sum(atoms[n].hybridization == 2 for n in ring) if sp2 == lr: # benzene like if lr == 4: # two bonds condensed aromatic rings tetracycles.append(ring) else: if fix_tautomers and lr % 2: # find potential pyrroles - acceptors.update(n for n in ring if atoms[n].atomic_number == 7 and not charges[n]) + acceptors.update(n for n in ring if (a := atoms[n]).atomic_number == 7 and not a.charge) n, *_, m = ring rings[n].add(m) rings[m].add(n) @@ -88,11 +85,12 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: rings[m].add(n) elif 4 < lr == sp2 + 1: # pyrroles, furanes, etc try: - n = next(n for n in ring if sh(n) == 1) + n = next(n for n in ring if atoms[n].hybridization == 1) except StopIteration: # exotic, just skip continue - an = atoms[n].atomic_number - if (c := charges[n]) == -1: + a = atoms[n] + an = a.atomic_number + if (c := a.charge) == -1: if an != 6 or lr != 5: # skip any but ferrocene continue elif c: # skip any charged @@ -149,8 +147,8 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: acceptors.discard(current) pyrroles.discard(start) pyrroles.add(current) - hydrogens[current] = 1 - hydrogens[start] = 0 + atoms[current]._implicit_hydrogens = 1 + atoms[start]._implicit_hydrogens = 0 break else: continue @@ -163,7 +161,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: else: # path not found continue for n, m, o in path: - bonds[n][m]._Bond__order = o # noqa + bonds[n][m]._order = o if not acceptors: break @@ -205,24 +203,24 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: for ring in tetracycles: if seen.issuperset(ring): n, *_, m = ring - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 for ring in rings: n, *_, m = ring - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 self.flush_cache() for ring in freaks: # aromatize rule based for q in freak_rules: if next(q.get_mapping(self, searching_scope=ring, automorphism_filter=False), None): n, *_, m = ring - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 break if freaks: self.flush_cache() # flush again diff --git a/chython/algorithms/calculate2d/__init__.py b/chython/algorithms/calculate2d/__init__.py index bef7b1f0..a787abc5 100644 --- a/chython/algorithms/calculate2d/__init__.py +++ b/chython/algorithms/calculate2d/__init__.py @@ -77,9 +77,11 @@ def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule']): else: bond_reduce = 1. - self_plane = self._plane + atoms = self._atoms for n, (x, y) in plane.items(): - self_plane[n] = (x / bond_reduce, y / bond_reduce) + a = atoms[n] + a._x = x / bond_reduce + a._y = y / bond_reduce if self.connected_components_count > 1: shift_x = 0. @@ -88,27 +90,28 @@ def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule']): self.__dict__.pop('__cached_method__repr_svg_', None) def _fix_plane_mean(self: 'MoleculeContainer', shift_x: float, shift_y=0., component=None) -> float: - plane = self._plane + atoms = self._atoms if component is None: - component = plane + component = atoms - left_atom = min(component, key=lambda x: plane[x][0]) - right_atom = max(component, key=lambda x: plane[x][0]) + left_atom = atoms[min(component, key=lambda x: atoms[x].x)] + right_atom = atoms[max(component, key=lambda x: atoms[x].x)] - min_x = plane[left_atom][0] - shift_x - if len(self._atoms[left_atom].atomic_symbol) == 2: + min_x = left_atom.x - shift_x + if len(left_atom.atomic_symbol) == 2: min_x -= .2 - max_x = plane[right_atom][0] - min_x - min_y = min(plane[x][1] for x in component) - max_y = max(plane[x][1] for x in component) + max_x = right_atom.x - min_x + min_y = min(atoms[x].y for x in component) + max_y = max(atoms[x].y for x in component) mean_y = (max_y + min_y) / 2 - shift_y for n in component: - x, y = plane[n] - plane[n] = (x - min_x, y - mean_y) + a = atoms[n] + a._x -= min_x + a._y -= mean_y - if -.18 <= plane[right_atom][1] <= .18: - factor = self._hydrogens[right_atom] + if -.18 <= right_atom.y <= .18: + factor = right_atom.implicit_hydrogens if factor == 1: max_x += .15 elif factor: @@ -116,21 +119,22 @@ def _fix_plane_mean(self: 'MoleculeContainer', shift_x: float, shift_y=0., compo return max_x def _fix_plane_min(self: 'MoleculeContainer', shift_x: float, shift_y=0., component=None) -> float: - plane = self._plane + atoms = self._atoms if component is None: - component = plane + component = atoms - right_atom = max(component, key=lambda x: plane[x][0]) - min_x = min(plane[x][0] for x in component) - shift_x - max_x = plane[right_atom][0] - min_x - min_y = min(plane[x][1] for x in component) - shift_y + right_atom = atoms[max(component, key=lambda x: atoms[x].x)] + min_x = min(atoms[x].x for x in component) - shift_x + max_x = right_atom.x - min_x + min_y = min(atoms[x].y for x in component) - shift_y for n in component: - x, y = plane[n] - plane[n] = (x - min_x, y - min_y) + a = atoms[n] + a._x -= min_x + a._y -= min_y - if shift_y - .18 <= plane[right_atom][1] <= shift_y + .18: - factor = self._hydrogens[right_atom] + if shift_y - .18 <= right_atom.y <= shift_y + .18: + factor = right_atom.implicit_hydrogens if factor == 1: max_x += .15 elif factor: @@ -138,21 +142,9 @@ def _fix_plane_min(self: 'MoleculeContainer', shift_x: float, shift_y=0., compon return max_x def __clean2d_prepare(self: 'MoleculeContainer', entry): - hydrogens = self._hydrogens - charges = self._charges - allenes_stereo = self._allenes_stereo - atoms_stereo = self._atoms_stereo - self._charges = self._hydrogens = {n: 0 for n in hydrogens} - self._atoms_stereo = self._allenes_stereo = {} - w = {n: random() for n in hydrogens} + w = {n: random() for n in self._atoms} w[entry] = -1 - try: - smiles, order = self._smiles(w.__getitem__, random=True, _return_order=True) - finally: - self._hydrogens = hydrogens - self._charges = charges - self._allenes_stereo = allenes_stereo - self._atoms_stereo = atoms_stereo + smiles, order = self._smiles(w.__getitem__, random=True, charges=False, stereo=False, _return_order=True) return ''.join(smiles).replace('~', '-'), order diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index 4eab3f82..1189d32a 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2022 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # Copyright 2019-2020 Dinar Batyrshin # This file is part of chython. # @@ -206,17 +206,17 @@ def depict(self: Union['MoleculeContainer', 'DepictMolecule'], *, width=None, he :param clean2d: calculate coordinates if necessary. """ uid = str(uuid4()) - values = self._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + atoms = self._atoms.values() + min_x = min(a.x for a in atoms) + max_x = max(a.x for a in atoms) + min_y = min(a.y for a in atoms) + max_y = max(a.y for a in atoms) if clean2d and len(self) > 1 and max_y - min_y < .01 and max_x - min_x < 0.01: self.clean2d() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for a in atoms) + max_x = max(a.x for a in atoms) + min_y = min(a.y for a in atoms) + max_y = max(a.y for a in atoms) bonds = self.__render_bonds() atoms, define, masks = self.__render_atoms(uid) @@ -247,8 +247,8 @@ def _repr_svg_(self): return self.depict() def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): + atoms = self._atoms svg = [] - plane = self._plane double_space = _render_config['double_space'] triple_space = _render_config['triple_space'] wedge_space = _render_config['wedge_space'] @@ -260,8 +260,8 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): wedge[n].add(m) wedge[m].add(n) - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy ny, my = -ny, -my dx, dy = _rotate_vector(0, wedge_space, mx - nx, ny - my) @@ -272,8 +272,8 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): if m in wedge[n]: continue order = bond.order - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy ny, my = -ny, -my if order in (1, 4): svg.append(f' ') @@ -291,18 +291,18 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): f'stroke-dasharray="{dash1:.2f} {dash2:.2f}"/>') for ring in self.aromatic_rings: - cx = sum(plane[n][0] for n in ring) / len(ring) - cy = sum(plane[n][1] for n in ring) / len(ring) + cx = sum(atoms[n].x for n in ring) / len(ring) + cy = sum(atoms[n].y for n in ring) / len(ring) for n, m in zip(ring, ring[1:]): - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy aromatic = _render_aromatic_bond(nx, ny, mx, my, cx, cy) if aromatic: svg.append(aromatic) - nx, ny = plane[ring[-1]] - mx, my = plane[ring[0]] + nx, ny = atoms[ring[-1]].xy + mx, my = atoms[ring[0]].xy aromatic = _render_aromatic_bond(nx, ny, mx, my, cx, cy) if aromatic: svg.append(aromatic) @@ -310,10 +310,6 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): def __render_atoms(self: 'MoleculeContainer', uid): bonds = self._bonds - plane = self._plane - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens carbon = _render_config['carbon'] mapping = _render_config['mapping'] @@ -360,14 +356,13 @@ def __render_atoms(self: 'MoleculeContainer', uid): mask = [] for n, atom in self._atoms.items(): - x, y = plane[n] - y = -y + x, y = atom.x, -atom.y symbol = atom.atomic_symbol - if not bonds[n] or symbol != 'C' or carbon or charges[n] or radicals[n] or atom.isotope or n in cumulenes: - if charges[n]: + if not bonds[n] or symbol != 'C' or carbon or atom.charge or atom.is_radical or atom.isotope or n in cumulenes: + if atom.charge: others.append(f' ' - f'{_render_charge[charges[n]]}{"↑" if radicals[n] else ""}') - elif radicals[n]: + f'{_render_charge[atom.charge]}{"↑" if atom.is_radical else ""}') + elif atom.is_radical: others.append(f' ↑') if atom.isotope: others.append(f' ') - h = hydrogens[n] + h = atom.implicit_hydrogens if h == 1: h = 'H' elif h: @@ -463,11 +458,11 @@ def depict(self: 'ReactionContainer', *, width=None, height=None, clean2d: bool if clean2d: for m in self.molecules(): if len(m) > 1: - values = m._plane.values() # noqa - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + atoms = m._atoms.values() + min_x = min(a.x for a in atoms) + max_x = max(a.x for a in atoms) + min_y = min(a.y for a in atoms) + max_y = max(a.y for a in atoms) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() self.fix_positions() diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index 412c76e0..b400a259 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -476,19 +476,20 @@ def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): return '~' def __ct_map(self, adjacency): + stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} + if not stereo_bonds: + return {} ct_map = {} - cts = self._cis_trans_stereo - if not cts: - return ct_map + ctc = self._stereo_cis_trans_centers ctt = self._stereo_cis_trans_terminals sct = self._stereo_cis_trans - ctc = self._stereo_cis_trans_counterpart + ctcp = self._stereo_cis_trans_counterpart seen = set() for k, vs in adjacency.items(): seen.add(k) - if (ts := ctt.get(k)) and ts in cts: - env = sct[ts] + if (cs := ctc.get(k)) and stereo_bonds.issuperset(cs): + env = sct[ctt[k]] for v in vs: if v in env: if (k, v) in ct_map: @@ -497,11 +498,11 @@ def __ct_map(self, adjacency): s = ct_map[(k, x)] ct_map[(k, v)] = not s # X/C(/R)=, C(\X)(/R)=, C(=C(\X)/R)=C= ct_map[(v, k)] = s - if y := ctt.get(v): # =C(\X)/R=, C(\X)(/R=)= + if y := ctc.get(v): # =C(\X)/R=, C(\X)(/R=)= ct_map[v] = k seen.add(y) - elif ts in seen: - o = ctc[k] + elif cs in seen: + o = ctcp[k] on = ct_map[o] s = ct_map[(o, on)] if not self._translate_cis_trans_sign(k, o, v, on): @@ -509,17 +510,17 @@ def __ct_map(self, adjacency): ct_map[(k, v)] = s ct_map[k] = v ct_map[(v, k)] = not s # C/R=, R\1...C/1 - if y := ctt.get(v): + if y := ctc.get(v): ct_map[v] = k seen.add(y) else: # left entry to double bond - if y := ctt.get(v): # 1,3-diene case + if y := ctc.get(v): # 1,3-diene case ct_map[v] = k seen.add(y) ct_map[(v, k)] = True # R/C=, C\1=...R/1, C(/R=)=, C(=C(/R=))=C= ct_map[(k, v)] = False # first DOWN ct_map[k] = v - seen.add(ts) + seen.add(cs) return ct_map diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 89bf57f5..c9fb0893 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -426,7 +426,7 @@ def clean_isotopes(self: 'MoleculeContainer') -> bool: isotopes = [x for x in atoms.values() if x.isotope] if isotopes: for i in isotopes: - i._Core__isotope = None + i._isotope = None self.flush_cache() self.fix_stereo() return True @@ -436,7 +436,7 @@ def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): bonds = self._bonds charges = self._charges radicals = self._radicals - calc_implicit = self._calc_implicit + calc_implicit = self.calc_implicit log = [] fixed = set() diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 1270e3dd..696b977c 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -38,7 +38,7 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals charges = self._charges radicals = self._radicals bonds = self._bonds - calc_implicit = self._calc_implicit + calc_implicit = self.calc_implicit entries, exits, rads, constrains, nitrogen_cat, nitrogen_ani, sulfur_cat = self.__entries() hs = set() while len(rads) > 1: diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py index 8ad032fd..59523deb 100644 --- a/chython/algorithms/stereo/graph.py +++ b/chython/algorithms/stereo/graph.py @@ -414,14 +414,15 @@ def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: @cached_property def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: """ - Cis-Trans terminal atoms to terminal pair mapping. + Cis-Trans terminal and central atoms to terminal pair mapping. """ terminals = {} for path in self._stereo_cumulenes: if len(path) % 2: continue n, m = path[0], path[-1] - terminals[n] = terminals[m] = (n, m) + i = len(path) // 2 + terminals[n] = terminals[m] = terminals[path[i]] = terminals[path[i - 1]] = (n, m) return terminals @cached_property diff --git a/chython/algorithms/stereo/molecule.py b/chython/algorithms/stereo/molecule.py index 7c443a0b..9415d551 100644 --- a/chython/algorithms/stereo/molecule.py +++ b/chython/algorithms/stereo/molecule.py @@ -204,19 +204,19 @@ def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): """ Calculate cis-trans stereo bonds from given 2d coordinates. Unusable for SMILES and INCHI. """ - cis_trans_stereo = self._cis_trans_stereo - plane = self._plane + atoms = self._atoms flag = False while self._chiral_cis_trans: - stereo = {} + stereo = False for nm in self._chiral_cis_trans: n, m = nm n1, m1, *_ = self._stereo_cis_trans[nm] - s = _cis_trans_sign(plane[n1], plane[n], plane[m], plane[m1]) + s = _cis_trans_sign(atoms[n1].xy, atoms[n].xy, atoms[m].xy, atoms[m1].xy) if s: - stereo[nm] = s > 0 + stereo = True + i, j = self._stereo_cis_trans_centers[n] + self._bonds[i][j]._stereo = s > 0 if stereo: - cis_trans_stereo.update(stereo) flag = True self.flush_stereo_cache() else: @@ -234,19 +234,21 @@ def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mar See and """ - if n not in self._atoms: + try: + atom = self._atoms[n] + except KeyError: raise AtomNotFound - if n in self._atoms_stereo or n in self._allenes_stereo: + if atom.stereo is not None: raise IsChiral if not isinstance(mark, bool): raise TypeError('stereo mark should be bool') if n in self._chiral_tetrahedrons: - self._atoms_stereo[n] = self._translate_tetrahedron_sign(n, env, mark) + atom._stereo = self._translate_tetrahedron_sign(n, env, mark) if clean_cache: self.flush_cache() elif n in self._chiral_allenes: - self._allenes_stereo[n] = self._translate_allene_sign(n, *env, mark) + atom._stereo = self._translate_allene_sign(n, *env, mark) if clean_cache: self.flush_cache() else: # only tetrahedrons supported @@ -272,15 +274,19 @@ def add_cis_trans_stereo(self: 'MoleculeContainer', n: int, m: int, n1: int, n2: raise AtomNotFound if not isinstance(mark, bool): raise TypeError('stereo mark should be bool') - if (n, m) in self._cis_trans_stereo or (m, n) in self._cis_trans_stereo: + + if n not in self._stereo_cis_trans_counterpart or self._stereo_cis_trans_counterpart[n] != m: + raise NotChiral + i, j = self._stereo_cis_trans_centers[n] + if self._bonds[i][j].stereo is not None: raise IsChiral if (n, m) in self._chiral_cis_trans: - self._cis_trans_stereo[(n, m)] = self._translate_cis_trans_sign(n, m, n1, n2, mark) + self._bonds[i][j] = self._translate_cis_trans_sign(n, m, n1, n2, mark) if clean_cache: self.flush_cache() elif (m, n) in self._chiral_cis_trans: - self._cis_trans_stereo[(m, n)] = self._translate_cis_trans_sign(m, n, n2, n1, mark) + self._bonds[i][j] = self._translate_cis_trans_sign(m, n, n2, n1, mark) if clean_cache: self.flush_cache() else: @@ -372,7 +378,7 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): if env[3]: orders.append((env[3], env[0], *term[::-1], n, True)) space.append(orders) - for n, s in self._atoms_stereo.items(): + for n, s in atoms_stereo.items(): order = list(self._stereo_tetrahedrons[n]) orders = [(*order, n, False)] for _ in range(1, len(order)): @@ -478,12 +484,18 @@ def _chiral_allenes(self) -> Set[int]: @cached_property def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: - if not self._atoms_stereo and not self._allenes_stereo and not self._cis_trans_stereo: + stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} + stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} + if not stereo_atoms and not stereo_bonds: return self.atoms_order + morgan = self.atoms_order.copy() - atoms_stereo = set(self._atoms_stereo) - cis_trans_stereo = set(self._cis_trans_stereo) - allenes_stereo = set(self._allenes_stereo) + atoms_stereo = stereo_atoms.intersection(self.tetrahedrons) + allenes_stereo = stereo_atoms - atoms_stereo + + cis_trans_terminals = self._stereo_cis_trans_terminals + cis_trans_stereo = {cis_trans_terminals[n] for n in stereo_bonds} + while True: # try iteratively differentiate stereo atoms. morgan, atoms_stereo, cis_trans_stereo, allenes_stereo, atoms_groups, cis_trans_groups, allenes_groups = \ @@ -599,6 +611,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): cis_trans = self._stereo_cis_trans allenes_centers = self._stereo_allenes_centers cis_trans_terminals = self._stereo_cis_trans_terminals + cis_trans_centers = self._stereo_cis_trans_centers morgan = self._chiral_morgan # find new chiral atoms and bonds. @@ -623,20 +636,22 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): if len(path) % 2: chiral_a.add(path[len(path) // 2]) else: - chiral_c.add((n, m)) + chiral_c.add(n) stereogenic.add(n) stereogenic.add(m) # ring cumulenes always chiral. can be already added. for nm in self._rings_cumulenes: n, m = nm if any(len(x) < 8 for x in atoms_rings[n]): # skip small rings. - if nm in chiral_c: # remove already added small rings cumulenes. - chiral_c.discard(nm) + if n in chiral_c: # remove already added small rings cumulenes. + chiral_c.discard(n) + if m in chiral_c: + chiral_c.discard(m) elif n in allenes_centers and (c := allenes_centers[n]) in chiral_a: chiral_a.discard(c) continue elif nm in cis_trans: - chiral_c.add(nm) + chiral_c.add(n) else: chiral_a.add(allenes_centers[n]) pseudo[m] = n @@ -697,13 +712,18 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): elif n in allenes_centers: chiral_a.add(allenes_centers[n]) else: - chiral_c.add(cis_trans_terminals[n]) + chiral_c.add(n) # skip already marked. - chiral_t.difference_update(self._atoms_stereo) - chiral_a.difference_update(self._allenes_stereo) - chiral_c.difference_update(self._cis_trans_stereo) - return chiral_t, chiral_c, chiral_a + stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} + chiral_t.difference_update(stereo_atoms) + chiral_a.difference_update(stereo_atoms) + diff = set() + for n in chiral_c: + i, j = cis_trans_centers[n] + if self._bonds[i][j].stereo is None: + diff.add(cis_trans_terminals[n]) + return chiral_t, diff, chiral_a def __differentiation(self: Union['MoleculeStereo', 'MoleculeContainer'], morgan, atoms_stereo, cis_trans_stereo, allenes_stereo): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 09fa158a..c96fb713 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -693,7 +693,55 @@ def _augmented_substructure(self, atoms: Iterable[int], deep: int): nodes.append(n) return nodes - def _calc_implicit(self, n: int): + def fix_labels(self, recalculate_hydrogens=True): + """ + Fix molecule internal represenation + """ + if not self._changed: + return + + self.calc_labels() # refresh all labels + + if recalculate_hydrogens: + for n in self._changed: + self.calc_implicit(n) # fix Hs count + self._changed = None + + def calc_labels(self): + atoms = self._atoms + for n, m_bond in self._bonds.items(): + neighbors = 0 + heteroatoms = 0 + hybridization = 1 + explicit_hydrogens = 0 + for m, bond in m_bond.items(): + order = bond.order + if order == 8: + continue + elif order == 4: + hybridization = 4 + elif hybridization != 4: + if order == 3: + hybridization = 3 + elif order == 2: + if hybridization == 1: + hybridization = 2 + elif hybridization == 2: + hybridization = 3 + + neighbors += 1 + an = atoms[m].atomic_number + if an == 1: + explicit_hydrogens += 1 + elif an != 6: + heteroatoms += 1 + atom = atoms[n] + atom._neighbors = neighbors + atom._heteroatoms = heteroatoms + atom._hybridization = hybridization + atom._explicit_hydrogens = explicit_hydrogens + + def calc_implicit(self, n: int): """ Set firs possible hydrogens count based on rules """ @@ -746,7 +794,7 @@ def _calc_implicit(self, n: int): return atom._implicit_hydrogens = None # rule not found - def _check_implicit(self, n: int, h: int) -> bool: + def check_implicit(self, n: int, h: int) -> bool: atom = self._atoms[n] if atom.atomic_number == 1: # hydrogen nether has implicit H return h == 0 diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index c8db572a..0a589410 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -138,8 +138,8 @@ def read_structure(self, *, current: bool = True): postprocess_parsed_molecule(tmp, remap=self.__remap, ignore=self.__ignore) parse_sgroup(data, tmp) mol = create_molecule(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self.__ignore, ignore_stereo=self.__ignore_stereo, - calc_cis_trans=self.__calc_cis_trans) + if not self.__ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) mol.meta.update(meta) return mol elif 'reaction' in data and isinstance(data['reaction'], dict): @@ -171,9 +171,9 @@ def read_structure(self, *, current: bool = True): postprocess_parsed_reaction(tmp, remap=self.__remap, ignore=self.__ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _m_cls=self.molecule_cls, _r_cls=self.reaction_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=self.__ignore, ignore_stereo=self.__ignore_stereo, - calc_cis_trans=self.__calc_cis_trans) + if not self.__ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) rxn.meta.update(meta) return rxn else: diff --git a/chython/files/RDFrw.py b/chython/files/RDFrw.py index 0d4475bc..62bebbae 100644 --- a/chython/files/RDFrw.py +++ b/chython/files/RDFrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # Copyright 2019 Dinar Batyrshin # This file is part of chython. # @@ -74,9 +74,9 @@ def read_structure(self, *, current=True) -> Union[ReactionContainer, MoleculeCo postprocess_parsed_reaction(tmp, remap=self._remap, ignore=self._ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _m_cls=self.molecule_cls, _r_cls=self.reaction_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) if meta: rxn.meta.update(meta) return rxn @@ -87,8 +87,8 @@ def read_structure(self, *, current=True) -> Union[ReactionContainer, MoleculeCo postprocess_parsed_molecule(tmp) mol = create_molecule(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) if meta: mol.meta.update(meta) return mol @@ -289,9 +289,9 @@ def mdl_rxn(data: str, /, *, ignore=True, calc_cis_trans=False, ignore_stereo=Fa postprocess_parsed_reaction(tmp, remap=remap, ignore=ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=ignore_bad_isotopes, _m_cls=_m_cls, _r_cls=_r_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - calc_cis_trans=calc_cis_trans) + if not ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=calc_cis_trans) return rxn diff --git a/chython/files/SDFrw.py b/chython/files/SDFrw.py index 6ef8e638..04edb0ad 100644 --- a/chython/files/SDFrw.py +++ b/chython/files/SDFrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -71,8 +71,8 @@ def read_structure(self, *, current=True) -> MoleculeContainer: postprocess_parsed_molecule(tmp, remap=self._remap, ignore=self._ignore) mol = create_molecule(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) meta = self.read_metadata() if meta: mol.meta.update(meta) @@ -213,8 +213,8 @@ def mdl_mol(data: str, /, *, ignore=True, calc_cis_trans=False, ignore_stereo=Fa postprocess_parsed_molecule(tmp, remap=remap, ignore=ignore) mol = create_molecule(tmp, ignore_bad_isotopes=ignore_bad_isotopes, _cls=_cls) - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - calc_cis_trans=calc_cis_trans) + if not ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=calc_cis_trans) return mol diff --git a/chython/files/_convert.py b/chython/files/_convert.py index a450146e..6da1ffd6 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -50,6 +50,9 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if n in bonds[m]: raise ValueError('atoms already bonded') bonds[n][m] = bonds[m][n] = Bond(b) + + g.calc_labels() # set all labels except rings + if any(a.get('z') for a in data['atoms']): # store conformer g._conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] @@ -70,13 +73,13 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if a.implicit_hydrogens is None: # let's try to calculate. in case of errors just keep as is. radicals in smiles should be in [brackets], # thus has implicit Hs value - g._calc_implicit(n) + g.calc_implicit(n) elif keep_implicit: # keep given Hs count as is continue else: # recheck given Hs count h = a.implicit_hydrogens # parsed Hs - g._calc_implicit(n) # recalculate + g.calc_implicit(n) # recalculate if a.implicit_hydrogens is None: # atom has invalid valence or aromatic ring. if a.hybridization == 4: # this is aromatic ring. just restore given H count. @@ -91,7 +94,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False elif not keep_radicals and not a.is_radical: # CXSMILES radical not set. # SMILES doesn't code radicals. so, let's try to guess. a._is_radical = True - if g._check_implicit(n, h): # radical form is valid + if g.check_implicit(n, h): # radical form is valid radicalized.append(n) a._implicit_hydrogens = h elif ignore: # radical state also has errors. @@ -114,11 +117,11 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif g._check_implicit(n, h): # set another possible implicit state. probably Al, P + elif g.check_implicit(n, h): # set another possible implicit state. probably Al, P a._implicit_hydrogens = h elif not keep_radicals and not a.is_radical: # CXSMILES radical is not set. try radical form a._is_radical = True - if g._check_implicit(n, h): + if g.check_implicit(n, h): a._implicit_hydrogens = h radicalized.append(n) # radical state also has errors. diff --git a/chython/files/_mdl/stereo.py b/chython/files/_mdl/stereo.py index 67dd52aa..ce9a651c 100644 --- a/chython/files/_mdl/stereo.py +++ b/chython/files/_mdl/stereo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,47 +19,16 @@ from ...exceptions import NotChiral, IsChiral, ValenceError -def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, calc_cis_trans=False, - keep_implicit=False): +def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans=False): + if ignore_stereo: + return mapping = data['mapping'] - hydrogens = molecule._hydrogens - hyb = molecule.hybridization - implicit_mismatch = {} if 'chython_parsing_log' in molecule.meta: log = molecule.meta['chython_parsing_log'] else: log = [] - for n, h in data['hydrogens'].items(): - n = mapping[n] - if keep_implicit: # override any calculated hydrogens count. - hydrogens[n] = h - if (hc := hydrogens[n]) is None: # aromatic rings or valence errors - if hyb(n) == 4: # this is aromatic rings. just store given H count. - hydrogens[n] = h - elif hc != h: - if hyb(n) == 4: - if ignore: - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif molecule._check_implicit(n, h): # set another possible implicit state. probably Al, P - hydrogens[n] = h - elif ignore: # just ignore it - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - - if implicit_mismatch: - molecule.meta['chython_implicit_mismatch'] = implicit_mismatch - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log - if ignore_stereo: - return - if calc_cis_trans: molecule.calculate_cis_trans_from_2d() diff --git a/chython/files/daylight/smarts.py b/chython/files/daylight/smarts.py index 4f095e03..3a409505 100644 --- a/chython/files/daylight/smarts.py +++ b/chython/files/daylight/smarts.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from functools import partial from itertools import count from re import compile, findall, search from .parser import parser @@ -113,7 +114,7 @@ def smarts(data: str): elif isinstance(e, str): e = QueryElement.from_symbol(e) else: - e = ListElement(e) + e = partial(ListElement, e) g.add_atom(e(**a), n) for n, m, b in data['bonds']: diff --git a/chython/files/xyz.py b/chython/files/xyz.py index 42ec82e7..612415bc 100644 --- a/chython/files/xyz.py +++ b/chython/files/xyz.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -35,22 +35,17 @@ def xyz(matrix: Sequence[Tuple[str, float, float, float]], charge=0, radical=0, atoms = mol._atoms bonds = mol._bonds - plane = mol._plane - hydrogens = mol._hydrogens - radicals = mol._radicals for n, (a, x, y, z) in enumerate(matrix, 1): atoms[n] = atom = Element.from_symbol(a)() - atom._attach_graph(mol, n) bonds[n] = {} - plane[n] = (x, y) + atom.x = x + atom.y = y + atom._implicit_hydrogens = 0 conformer[n] = (x, y, z) - hydrogens[n] = 0 # implicit hydrogens not supported. - radicals[n] = False # set default value - if atom_charge is None or None in atom_charge: - mol._charges = {n: 0 for n in atoms} # reset charges - else: - mol._charges = dict(enumerate(atom_charge, 1)) + if atom_charge is not None and None not in atom_charge: + for n, c in enumerate(atom_charge, 1): + atoms[n]._charge = c charge = sum(atom_charge) pb = possible_bonds(array(list(conformer.values())), array([a.atomic_radius for a in atoms.values()]), diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 56e9f3d3..6b89b226 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -47,13 +47,6 @@ def __init__(self, isotope: Optional[int] = None, *, self._stereo = stereo self._parsed_mapping = parsed_mapping - self._explicit_hydrogens = 0 - self._neighbors = 0 - self._heteroatoms = 0 - self._hybridization = 1 - self._ring_sizes = () - self._in_ring = False - def __repr__(self): if self.isotope: return f'{self.__class__.__name__}({self.isotope})' @@ -273,12 +266,6 @@ def copy(self, full=False, hydrogens=False, stereo=False) -> 'Element': copy._ring_sizes = self.ring_sizes copy._in_ring = self.in_ring else: - copy._explicit_hydrogens = 0 - copy._neighbors = 0 - copy._heteroatoms = 0 - copy._hybridization = 1 - copy._ring_sizes = () - copy._in_ring = False if hydrogens: copy._implicit_hydrogens = self.implicit_hydrogens else: diff --git a/chython/reactor/base.py b/chython/reactor/base.py index 30212b08..073713e4 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -100,7 +100,7 @@ def _patcher(self, structure: MoleculeContainer, mapping): # replace atom copy._atoms[n] = a = atom.copy() # noqa a._attach_graph(copy, n) # noqa - copy._calc_implicit(n) # noqa + copy.calc_implicit(n) # noqa if self.__fix_rings: copy.kekule() if not copy.thiele(fix_tautomers=self.__fix_tautomers): @@ -194,7 +194,7 @@ def __prepare_skeleton(self, structure, mapping): new._hydrogens.update(keep_hydrogens) # noqa for n in new: if n not in keep_hydrogens: - new._calc_implicit(n) # noqa + new.calc_implicit(n) # noqa return new def __set_stereo(self, new, structure, mapping): From 0e33370e4d85337c5685ca74f313258b650d4fe0 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 8 Nov 2024 16:27:06 +0100 Subject: [PATCH 12/68] Add delta_isotope support for elements Implemented delta_isotope to manage isotopic modifications and added corresponding mdl_isotope properties for multiple elements. Removed common_isotopes and updated relevant assertions for consistent isotope handling. Also updated copyright years for multiple files. --- chython/algorithms/aromatics/kekule.py | 2 +- chython/algorithms/calculate2d/__init__.py | 2 +- chython/algorithms/isomorphism.py | 16 +-- chython/files/_mdl/__init__.py | 4 +- chython/files/_mdl/mol.py | 26 +---- chython/files/libinchi/wrapper.py | 20 ++-- chython/periodictable/__init__.py | 28 +++-- chython/periodictable/base/element.py | 13 ++- chython/periodictable/groupI.py | 28 +++++ chython/periodictable/groupII.py | 24 ++++ chython/periodictable/groupIII.py | 128 +++++++++++++++++++++ chython/periodictable/groupIV.py | 16 +++ chython/periodictable/groupIX.py | 16 +++ chython/periodictable/groupV.py | 16 +++ chython/periodictable/groupVI.py | 16 +++ chython/periodictable/groupVII.py | 16 +++ chython/periodictable/groupVIII.py | 16 +++ chython/periodictable/groupX.py | 16 +++ chython/periodictable/groupXI.py | 16 +++ chython/periodictable/groupXII.py | 16 +++ chython/periodictable/groupXIII.py | 24 ++++ chython/periodictable/groupXIV.py | 24 ++++ chython/periodictable/groupXV.py | 24 ++++ chython/periodictable/groupXVI.py | 24 ++++ chython/periodictable/groupXVII.py | 24 ++++ chython/periodictable/groupXVIII.py | 28 +++++ 26 files changed, 526 insertions(+), 57 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index f1df888c..5a7cc494 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/calculate2d/__init__.py b/chython/algorithms/calculate2d/__init__.py index a787abc5..c8fe17a5 100644 --- a/chython/algorithms/calculate2d/__init__.py +++ b/chython/algorithms/calculate2d/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019, 2020 Dinar Batyrshin # This file is part of chython. # diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index a40188a6..ce9193bc 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -185,15 +185,6 @@ def _cython_compiled_structure(self): # long IV: # ring_sizes: not-in-ring bit, 3-atom ring, 4-...., 65-atom ring - from ..files._mdl.mol import common_isotopes - - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - neighbors = self.neighbors - heteroatoms = self.heteroatoms - rings_sizes = self.atoms_rings_sizes - hybridization = self.hybridization mapping = {} numbers = [] @@ -204,7 +195,7 @@ def _cython_compiled_structure(self): for i, (n, a) in enumerate(self._atoms.items()): mapping[n] = i numbers.append(n) - v2 = 1 << (hybridization(n) - 1) + v2 = 1 << (a.hybridization - 1) if (an := a.atomic_number) > 56: if an > 116: # Ts, Og an = 116 @@ -214,7 +205,7 @@ def _cython_compiled_structure(self): v1 = 1 << (57 - an) if a.isotope: - v3 = 1 << (a.isotope - common_isotopes[a.atomic_symbol] + 54) + v3 = 1 << (a.isotope - a.mdl_isotope + 54) if radicals[n]: v3 |= 0x200000000000 else: @@ -337,7 +328,6 @@ def _cython_compiled_query(self): # padding: 1 bit # bond: single, double, triple, aromatic, special = 5 bit # bond in ring: 2 bit - from ..files._mdl.mol import common_isotopes _components, _closures = self._compiled_query components = [] @@ -378,7 +368,7 @@ def _cython_compiled_query(self): v1 = 1 << (57 - n) v2 = 0 if a.isotope: - v3 = 1 << (a.isotope - common_isotopes[a.atomic_symbol] + 54) + v3 = 1 << (a.isotope - a.mdl_isotope + 54) if a.is_radical: v3 |= 0x200000000000 else: diff --git a/chython/files/_mdl/__init__.py b/chython/files/_mdl/__init__.py index d941f381..2310481a 100644 --- a/chython/files/_mdl/__init__.py +++ b/chython/files/_mdl/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .mol import parse_mol_v2000, common_isotopes +from .mol import parse_mol_v2000 from .emol import parse_mol_v3000 from .rxn import parse_rxn_v2000 from .erxn import parse_rxn_v3000 diff --git a/chython/files/_mdl/mol.py b/chython/files/_mdl/mol.py index 3879b7ea..3e15cbf9 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/_mdl/mol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,19 +19,6 @@ from ...exceptions import EmptyMolecule, InvalidCharge, InvalidV2000 -common_isotopes = {'H': 1, 'He': 4, 'Li': 7, 'Be': 9, 'B': 11, 'C': 12, 'N': 14, 'O': 16, 'F': 19, 'Ne': 20, 'Na': 23, - 'Mg': 24, 'Al': 27, 'Si': 28, 'P': 31, 'S': 32, 'Cl': 35, 'Ar': 40, 'K': 39, 'Ca': 40, 'Sc': 45, - 'Ti': 48, 'V': 51, 'Cr': 52, 'Mn': 55, 'Fe': 56, 'Co': 59, 'Ni': 59, 'Cu': 64, 'Zn': 65, 'Ga': 70, - 'Ge': 73, 'As': 75, 'Se': 79, 'Br': 80, 'Kr': 84, 'Rb': 85, 'Sr': 88, 'Y': 89, 'Zr': 91, 'Nb': 93, - 'Mo': 96, 'Tc': 98, 'Ru': 101, 'Rh': 103, 'Pd': 106, 'Ag': 108, 'Cd': 112, 'In': 115, 'Sn': 119, - 'Sb': 122, 'Te': 128, 'I': 127, 'Xe': 131, 'Cs': 133, 'Ba': 137, 'La': 139, 'Ce': 140, 'Pr': 141, - 'Nd': 144, 'Pm': 145, 'Sm': 150, 'Eu': 152, 'Gd': 157, 'Tb': 159, 'Dy': 163, 'Ho': 165, 'Er': 167, - 'Tm': 169, 'Yb': 173, 'Lu': 175, 'Hf': 178, 'Ta': 181, 'W': 184, 'Re': 186, 'Os': 190, 'Ir': 192, - 'Pt': 195, 'Au': 197, 'Hg': 201, 'Tl': 204, 'Pb': 207, 'Bi': 209, 'Po': 209, 'At': 210, 'Rn': 222, - 'Fr': 223, 'Ra': 226, 'Ac': 227, 'Th': 232, 'Pa': 231, 'U': 238, 'Np': 237, 'Pu': 244, 'Am': 243, - 'Cm': 247, 'Bk': 247, 'Cf': 251, 'Es': 252, 'Fm': 257, 'Md': 258, 'No': 259, 'Lr': 260, 'Rf': 261, - 'Db': 270, 'Sg': 269, 'Bh': 270, 'Hs': 270, 'Mt': 278, 'Ds': 281, 'Rg': 281, 'Cn': 285, 'Nh': 278, - 'Fl': 289, 'Mc': 289, 'Lv': 293, 'Ts': 297, 'Og': 294} _ctf_data = {'R': 'is_radical', 'C': 'charge', 'I': 'isotope'} _charge_map = {' 0': 0, ' 1': 3, ' 2': 2, ' 3': 1, ' 4': 0, ' 5': -1, ' 6': -2, ' 7': -3} @@ -59,6 +46,7 @@ def parse_mol_v2000(data): raise InvalidCharge element = line[31:34].strip() isotope = line[34:36] + delta_isotope = None if element in 'AL': raise ValueError('queries not supported') @@ -68,17 +56,15 @@ def parse_mol_v2000(data): raise ValueError('isotope on deuterium atom') isotope = 2 elif isotope != ' 0': - try: - isotope = common_isotopes[element] + int(isotope) - except KeyError: - raise ValueError('invalid element symbol') + delta_isotope = int(isotope) + isotope = None else: isotope = None mapping = line[60:63] atoms.append({'element': element, 'charge': charge, 'isotope': isotope, 'is_radical': False, 'mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), - 'z': float(line[20:30])}) + 'z': float(line[20:30]), 'delta_isotope': delta_isotope}) for line in data[4 + atoms_count: 4 + atoms_count + bonds_count]: a1, a2 = int(line[0:3]) - 1, int(line[3:6]) - 1 @@ -157,4 +143,4 @@ def parse_mol_v2000(data): 'meta': None, 'log': log} -__all__ = ['parse_mol_v2000', 'common_isotopes'] +__all__ = ['parse_mol_v2000'] diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 55749f34..a3504a0b 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -21,7 +21,6 @@ from sysconfig import get_platform from warnings import warn from .._convert import create_molecule -from .._mdl import common_isotopes from ...containers import MoleculeContainer from ...containers.bonds import Bond from ...exceptions import ValenceError, IsChiral, NotChiral @@ -54,8 +53,8 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'mapping': 0, 'x': atom.x, 'y': atom.y, 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, - 'hydrogens': atom.implicit_hydrogens, 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, - 't': atom.implicit_tritium}) + 'hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope, + 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, 't': atom.implicit_tritium}) for k in range(atom.num_bonds): m = atom.neighbor[k] @@ -200,12 +199,13 @@ def atomic_symbol(self): @property def isotope(self): - isotope = self.isotopic_mass - if not isotope: - isotope = None - elif isotope > 9000: # OVER NINE THOUSANDS! - isotope += common_isotopes[self.atomic_symbol] - 10000 - return isotope + if 0 < self.isotopic_mass < 9000: # OVER NINE THOUSANDS! + return self.isotopic_mass + + @property + def delta_isotope(self): + if self.isotope > 9000: + return self.isotope - 10_000 @property def is_radical(self): diff --git a/chython/periodictable/__init__.py b/chython/periodictable/__init__.py index 5f272d31..d494564e 100644 --- a/chython/periodictable/__init__.py +++ b/chython/periodictable/__init__.py @@ -39,6 +39,7 @@ from .groupXVII import * from .groupXVIII import * + modules = {v.__name__: v for k, v in globals().items() if k.startswith('group') and k != 'groups'} elements = {k: v for k, v in globals().items() if isinstance(v, ABCMeta) and k != 'Element' and issubclass(v, Element)} @@ -48,12 +49,21 @@ __all__.extend(elements) -for _class in (DynamicElement, QueryElement): - for k, v in elements.items(): - name = f'{_class.__name__[:-7]}{k}' - globals()[name] = cls = type(name, - (_class, *v.__mro__[-3:-1]), - {'__module__': v.__module__, '__slots__': (), 'atomic_number': v.atomic_number}) - setattr(modules[v.__module__], name, cls) - modules[v.__module__].__all__.append(name) - __all__.append(name) +for k, v in elements.items(): + name = f'Dynamic{k}' + globals()[name] = cls = type(name, (DynamicElement,), + {'__module__': v.__module__, '__slots__': (), + 'atomic_number': v.atomic_number}) + setattr(modules[v.__module__], name, cls) + modules[v.__module__].__all__.append(name) + __all__.append(name) + +for k, v in elements.items(): + name = f'Query{k}' + globals()[name] = cls = type(name, (QueryElement,), + {'__module__': v.__module__, '__slots__': (), + 'atomic_number': v.atomic_number, + 'mdl_isotope': v.mdl_isotope}) + setattr(modules[v.__module__], name, cls) + modules[v.__module__].__all__.append(name) + __all__.append(name) diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 6b89b226..9014e064 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -32,12 +32,16 @@ class Element(ABC): def __init__(self, isotope: Optional[int] = None, *, charge: int = 0, is_radical: bool = False, x: float = 0., y: float = 0., implicit_hydrogens: Optional[int] = None, stereo: Optional[bool] = None, - parsed_mapping: Optional[int] = None): + parsed_mapping: Optional[int] = None, delta_isotope: Optional[int] = None): """ Element object with specified isotope :param isotope: Isotope number of element """ + if delta_isotope is not None: + assert isotope is None, 'isotope absolute value and delta value provided' + isotope = self.mdl_isotope + delta_isotope + self.isotope = isotope self.charge = charge self.is_radical = is_radical @@ -107,6 +111,13 @@ def atomic_radius(self) -> float: Valence radius of atom """ + @property + @abstractmethod + def mdl_isotope(self) -> int: + """ + MDL MOL common isotope + """ + @property def charge(self) -> int: """ diff --git a/chython/periodictable/groupI.py b/chython/periodictable/groupI.py index a7c10f55..a0505f20 100644 --- a/chython/periodictable/groupI.py +++ b/chython/periodictable/groupI.py @@ -48,6 +48,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 0.53 + @property + def mdl_isotope(self): + return 1 + class Li(Element, PeriodII, GroupI): __slots__ = () @@ -76,6 +80,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 167 + @property + def mdl_isotope(self): + return 7 + class Na(Element, PeriodIII, GroupI): __slots__ = () @@ -104,6 +112,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.9 + @property + def mdl_isotope(self): + return 23 + class K(Element, PeriodIV, GroupI): __slots__ = () @@ -132,6 +144,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.43 + @property + def mdl_isotope(self): + return 39 + class Rb(Element, PeriodV, GroupI): __slots__ = () @@ -160,6 +176,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.65 + @property + def mdl_isotope(self): + return 85 + class Cs(Element, PeriodVI, GroupI): __slots__ = () @@ -188,6 +208,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.98 + @property + def mdl_isotope(self): + return 133 + class Fr(Element, PeriodVII, GroupI): __slots__ = () @@ -216,5 +240,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.98 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 223 + __all__ = ['H', 'Li', 'Na', 'K', 'Rb', 'Cs', 'Fr'] diff --git a/chython/periodictable/groupII.py b/chython/periodictable/groupII.py index bae2cf65..8b6337d0 100644 --- a/chython/periodictable/groupII.py +++ b/chython/periodictable/groupII.py @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.12 + @property + def mdl_isotope(self): + return 9 + class Mg(Element, PeriodIII, GroupII): __slots__ = () @@ -81,6 +85,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 24 + class Ca(Element, PeriodIV, GroupII): __slots__ = () @@ -110,6 +118,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.94 + @property + def mdl_isotope(self): + return 40 + class Sr(Element, PeriodV, GroupII): __slots__ = () @@ -138,6 +150,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.19 + @property + def mdl_isotope(self): + return 88 + class Ba(Element, PeriodVI, GroupII): __slots__ = () @@ -167,6 +183,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.53 + @property + def mdl_isotope(self): + return 137 + class Ra(Element, PeriodVII, GroupII): __slots__ = () @@ -195,5 +215,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.53 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 226 + __all__ = ['Be', 'Mg', 'Ca', 'Sr', 'Ba', 'Ra'] diff --git a/chython/periodictable/groupIII.py b/chython/periodictable/groupIII.py index a2683f8d..ca11c5f1 100644 --- a/chython/periodictable/groupIII.py +++ b/chython/periodictable/groupIII.py @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.84 + @property + def mdl_isotope(self): + return 45 + class Y(Element, PeriodV, GroupIII): __slots__ = () @@ -77,6 +81,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 + @property + def mdl_isotope(self): + return 89 + class La(Element, PeriodVI, GroupIII): __slots__ = () @@ -105,6 +113,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 139 + class Ce(Element, PeriodVI, GroupIII): __slots__ = () @@ -137,6 +149,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 140 + class Pr(Element, PeriodVI, GroupIII): __slots__ = () @@ -167,6 +183,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.47 + @property + def mdl_isotope(self): + return 141 + class Nd(Element, PeriodVI, GroupIII): __slots__ = () @@ -208,6 +228,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.06 + @property + def mdl_isotope(self): + return 144 + class Pm(Element, PeriodVI, GroupIII): __slots__ = () @@ -236,6 +260,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.05 + @property + def mdl_isotope(self): + return 145 + class Sm(Element, PeriodVI, GroupIII): __slots__ = () @@ -277,6 +305,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.38 + @property + def mdl_isotope(self): + return 150 + class Eu(Element, PeriodVI, GroupIII): __slots__ = () @@ -316,6 +348,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.31 + @property + def mdl_isotope(self): + return 152 + class Gd(Element, PeriodVI, GroupIII): __slots__ = () @@ -345,6 +381,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.33 + @property + def mdl_isotope(self): + return 157 + class Tb(Element, PeriodVI, GroupIII): __slots__ = () @@ -375,6 +415,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.25 + @property + def mdl_isotope(self): + return 159 + class Dy(Element, PeriodVI, GroupIII): __slots__ = () @@ -406,6 +450,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.28 + @property + def mdl_isotope(self): + return 163 + class Ho(Element, PeriodVI, GroupIII): __slots__ = () @@ -445,6 +493,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.26 + @property + def mdl_isotope(self): + return 165 + class Er(Element, PeriodVI, GroupIII): __slots__ = () @@ -473,6 +525,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.26 + @property + def mdl_isotope(self): + return 167 + class Tm(Element, PeriodVI, GroupIII): __slots__ = () @@ -512,6 +568,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.22 + @property + def mdl_isotope(self): + return 169 + class Yb(Element, PeriodVI, GroupIII): __slots__ = () @@ -552,6 +612,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.22 + @property + def mdl_isotope(self): + return 173 + class Lu(Element, PeriodVI, GroupIII): __slots__ = () @@ -580,6 +644,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 + @property + def mdl_isotope(self): + return 175 + class Ac(Element, PeriodVII, GroupIII): __slots__ = () @@ -608,6 +676,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 227 + class Th(Element, PeriodVII, GroupIII): __slots__ = () @@ -641,6 +713,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 232 + class Pa(Element, PeriodVII, GroupIII): __slots__ = () @@ -671,6 +747,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 231 + class U(Element, PeriodVII, GroupIII): __slots__ = () @@ -700,6 +780,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 238 + class Np(Element, PeriodVII, GroupIII): __slots__ = () @@ -730,6 +814,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 237 + class Pu(Element, PeriodVII, GroupIII): __slots__ = () @@ -768,6 +856,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 244 + class Am(Element, PeriodVII, GroupIII): __slots__ = () @@ -796,6 +888,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 243 + class Cm(Element, PeriodVII, GroupIII): __slots__ = () @@ -824,6 +920,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 247 + class Bk(Element, PeriodVII, GroupIII): __slots__ = () @@ -852,6 +952,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 247 + class Cf(Element, PeriodVII, GroupIII): __slots__ = () @@ -880,6 +984,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 251 + class Es(Element, PeriodVII, GroupIII): __slots__ = () @@ -908,6 +1016,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 252 + class Fm(Element, PeriodVII, GroupIII): __slots__ = () @@ -936,6 +1048,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 257 + class Md(Element, PeriodVII, GroupIII): __slots__ = () @@ -964,6 +1080,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 258 + class No(Element, PeriodVII, GroupIII): __slots__ = () @@ -992,6 +1112,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 259 + class Lr(Element, PeriodVII, GroupIII): __slots__ = () @@ -1020,6 +1144,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 260 + __all__ = ['Sc', 'Y', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', diff --git a/chython/periodictable/groupIV.py b/chython/periodictable/groupIV.py index c80e1482..70c626b8 100644 --- a/chython/periodictable/groupIV.py +++ b/chython/periodictable/groupIV.py @@ -80,6 +80,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.76 + @property + def mdl_isotope(self): + return 48 + class Zr(Element, PeriodV, GroupIV): __slots__ = () @@ -127,6 +131,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.06 + @property + def mdl_isotope(self): + return 91 + class Hf(Element, PeriodVI, GroupIV): __slots__ = () @@ -162,6 +170,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.08 + @property + def mdl_isotope(self): + return 178 + class Rf(Element, PeriodVII, GroupIV): __slots__ = () @@ -190,5 +202,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.08 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 261 + __all__ = ['Ti', 'Zr', 'Hf', 'Rf'] diff --git a/chython/periodictable/groupIX.py b/chython/periodictable/groupIX.py index 97608fd9..b1fe8055 100644 --- a/chython/periodictable/groupIX.py +++ b/chython/periodictable/groupIX.py @@ -71,6 +71,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.52 + @property + def mdl_isotope(self): + return 59 + class Rh(Element, PeriodV, GroupIX): __slots__ = () @@ -108,6 +112,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.73 + @property + def mdl_isotope(self): + return 103 + class Ir(Element, PeriodVI, GroupIX): __slots__ = () @@ -148,6 +156,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.8 + @property + def mdl_isotope(self): + return 192 + class Mt(Element, PeriodVII, GroupIX): __slots__ = () @@ -176,5 +188,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.8 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 278 + __all__ = ['Co', 'Rh', 'Ir', 'Mt'] diff --git a/chython/periodictable/groupV.py b/chython/periodictable/groupV.py index 66036c63..67e56d7d 100644 --- a/chython/periodictable/groupV.py +++ b/chython/periodictable/groupV.py @@ -68,6 +68,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 + @property + def mdl_isotope(self): + return 51 + class Nb(Element, PeriodV, GroupV): __slots__ = () @@ -111,6 +115,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.98 + @property + def mdl_isotope(self): + return 93 + class Ta(Element, PeriodVI, GroupV): __slots__ = () @@ -144,6 +152,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.0 + @property + def mdl_isotope(self): + return 181 + class Db(Element, PeriodVII, GroupV): __slots__ = () @@ -172,5 +184,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.0 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['V', 'Nb', 'Ta', 'Db'] diff --git a/chython/periodictable/groupVI.py b/chython/periodictable/groupVI.py index 03b76191..0511d734 100644 --- a/chython/periodictable/groupVI.py +++ b/chython/periodictable/groupVI.py @@ -59,6 +59,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.66 + @property + def mdl_isotope(self): + return 52 + class Mo(Element, PeriodV, GroupVI): __slots__ = () @@ -102,6 +106,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.90 + @property + def mdl_isotope(self): + return 96 + class W(Element, PeriodVI, GroupVI): __slots__ = () @@ -135,6 +143,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.93 + @property + def mdl_isotope(self): + return 184 + class Sg(Element, PeriodVII, GroupVI): __slots__ = () @@ -163,5 +175,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.93 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 269 + __all__ = ['Cr', 'Mo', 'W', 'Sg'] diff --git a/chython/periodictable/groupVII.py b/chython/periodictable/groupVII.py index 3fceee40..f754b97e 100644 --- a/chython/periodictable/groupVII.py +++ b/chython/periodictable/groupVII.py @@ -57,6 +57,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.61 + @property + def mdl_isotope(self): + return 55 + class Tc(Element, PeriodV, GroupVII): __slots__ = () @@ -86,6 +90,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.83 + @property + def mdl_isotope(self): + return 98 + class Re(Element, PeriodVI, GroupVII): __slots__ = () @@ -114,6 +122,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.88 + @property + def mdl_isotope(self): + return 186 + class Bh(Element, PeriodVII, GroupVII): __slots__ = () @@ -142,5 +154,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.88 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['Mn', 'Tc', 'Re', 'Bh'] diff --git a/chython/periodictable/groupVIII.py b/chython/periodictable/groupVIII.py index ea510d60..15056c3f 100644 --- a/chython/periodictable/groupVIII.py +++ b/chython/periodictable/groupVIII.py @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 56 + class Ru(Element, PeriodV, GroupVIII): __slots__ = () @@ -81,6 +85,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.78 + @property + def mdl_isotope(self): + return 101 + class Os(Element, PeriodVI, GroupVIII): __slots__ = () @@ -113,6 +121,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.85 + @property + def mdl_isotope(self): + return 190 + class Hs(Element, PeriodVII, GroupVIII): __slots__ = () @@ -141,5 +153,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.85 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['Fe', 'Ru', 'Os', 'Hs'] diff --git a/chython/periodictable/groupX.py b/chython/periodictable/groupX.py index 0ca6aa05..8c8b2c08 100644 --- a/chython/periodictable/groupX.py +++ b/chython/periodictable/groupX.py @@ -52,6 +52,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.49 + @property + def mdl_isotope(self): + return 59 + class Pd(Element, PeriodV, GroupX): __slots__ = () @@ -85,6 +89,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.69 + @property + def mdl_isotope(self): + return 106 + class Pt(Element, PeriodVI, GroupX): __slots__ = () @@ -118,6 +126,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.77 + @property + def mdl_isotope(self): + return 195 + class Ds(Element, PeriodVII, GroupX): __slots__ = () @@ -146,5 +158,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.77 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 281 + __all__ = ['Ni', 'Pd', 'Pt', 'Ds'] diff --git a/chython/periodictable/groupXI.py b/chython/periodictable/groupXI.py index 96be94af..1c80d3d5 100644 --- a/chython/periodictable/groupXI.py +++ b/chython/periodictable/groupXI.py @@ -52,6 +52,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 64 + class Ag(Element, PeriodV, GroupXI): __slots__ = () @@ -84,6 +88,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.65 + @property + def mdl_isotope(self): + return 108 + class Au(Element, PeriodVI, GroupXI): __slots__ = () @@ -116,6 +124,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.74 + @property + def mdl_isotope(self): + return 197 + class Rg(Element, PeriodVII, GroupXI): __slots__ = () @@ -144,5 +156,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.74 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 281 + __all__ = ['Cu', 'Ag', 'Au', 'Rg'] diff --git a/chython/periodictable/groupXII.py b/chython/periodictable/groupXII.py index 17a3e8cf..2b59c90b 100644 --- a/chython/periodictable/groupXII.py +++ b/chython/periodictable/groupXII.py @@ -50,6 +50,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.42 + @property + def mdl_isotope(self): + return 65 + class Cd(Element, PeriodV, GroupXII): __slots__ = () @@ -80,6 +84,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.61 + @property + def mdl_isotope(self): + return 112 + class Hg(Element, PeriodVI, GroupXII): __slots__ = () @@ -110,6 +118,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 + @property + def mdl_isotope(self): + return 201 + class Cn(Element, PeriodVII, GroupXII): __slots__ = () @@ -138,5 +150,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 285 + __all__ = ['Zn', 'Cd', 'Hg', 'Cn'] diff --git a/chython/periodictable/groupXIII.py b/chython/periodictable/groupXIII.py index c0d3f507..ef5243a6 100644 --- a/chython/periodictable/groupXIII.py +++ b/chython/periodictable/groupXIII.py @@ -51,6 +51,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 11 + class Al(Element, PeriodIII, GroupXIII): __slots__ = () @@ -81,6 +85,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.18 + @property + def mdl_isotope(self): + return 27 + class Ga(Element, PeriodIV, GroupXIII): __slots__ = () @@ -115,6 +123,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.36 + @property + def mdl_isotope(self): + return 70 + class In(Element, PeriodV, GroupXIII): __slots__ = () @@ -145,6 +157,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 115 + class Tl(Element, PeriodVI, GroupXIII): __slots__ = () @@ -175,6 +191,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 204 + class Nh(Element, PeriodVII, GroupXIII): __slots__ = () @@ -203,5 +223,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 278 + __all__ = ['B', 'Al', 'Ga', 'In', 'Tl', 'Nh'] diff --git a/chython/periodictable/groupXIV.py b/chython/periodictable/groupXIV.py index 0a18f705..bd94ad60 100644 --- a/chython/periodictable/groupXIV.py +++ b/chython/periodictable/groupXIV.py @@ -50,6 +50,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .67 + @property + def mdl_isotope(self): + return 12 + class Si(Element, PeriodIII, GroupXIV): __slots__ = () @@ -78,6 +82,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.11 + @property + def mdl_isotope(self): + return 28 + class Ge(Element, PeriodIV, GroupXIV): __slots__ = () @@ -106,6 +114,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.25 + @property + def mdl_isotope(self): + return 73 + class Sn(Element, PeriodV, GroupXIV): __slots__ = () @@ -144,6 +156,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 119 + class Pb(Element, PeriodVI, GroupXIV): __slots__ = () @@ -182,6 +198,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.54 + @property + def mdl_isotope(self): + return 207 + class Fl(Element, PeriodVII, GroupXIV): __slots__ = () @@ -210,5 +230,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.54 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 289 + __all__ = ['C', 'Si', 'Ge', 'Sn', 'Pb', 'Fl'] diff --git a/chython/periodictable/groupXV.py b/chython/periodictable/groupXV.py index 218aeecc..700efe89 100644 --- a/chython/periodictable/groupXV.py +++ b/chython/periodictable/groupXV.py @@ -51,6 +51,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .56 + @property + def mdl_isotope(self): + return 14 + class P(Element, PeriodIII, GroupXV): __slots__ = () @@ -86,6 +90,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .98 + @property + def mdl_isotope(self): + return 31 + class As(Element, PeriodIV, GroupXV): __slots__ = () @@ -114,6 +122,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.14 + @property + def mdl_isotope(self): + return 75 + class Sb(Element, PeriodV, GroupXV): __slots__ = () @@ -143,6 +155,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.33 + @property + def mdl_isotope(self): + return 122 + class Bi(Element, PeriodVI, GroupXV): __slots__ = () @@ -188,6 +204,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.43 + @property + def mdl_isotope(self): + return 209 + class Mc(Element, PeriodVII, GroupXV): __slots__ = () @@ -216,5 +236,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.43 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 289 + __all__ = ['N', 'P', 'As', 'Sb', 'Bi', 'Mc'] diff --git a/chython/periodictable/groupXVI.py b/chython/periodictable/groupXVI.py index 4791eb2a..85f72a23 100644 --- a/chython/periodictable/groupXVI.py +++ b/chython/periodictable/groupXVI.py @@ -51,6 +51,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .48 + @property + def mdl_isotope(self): + return 16 + class S(Element, PeriodIII, GroupXVI): __slots__ = () @@ -227,6 +231,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 32 + class Se(Element, PeriodIV, GroupXVI): __slots__ = () @@ -286,6 +294,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.03 + @property + def mdl_isotope(self): + return 79 + class Te(Element, PeriodV, GroupXVI): __slots__ = () @@ -336,6 +348,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.23 + @property + def mdl_isotope(self): + return 128 + class Po(Element, PeriodVI, GroupXVI): __slots__ = () @@ -369,6 +385,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.35 + @property + def mdl_isotope(self): + return 209 + class Lv(Element, PeriodVII, GroupXVI): __slots__ = () @@ -397,5 +417,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.35 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 293 + __all__ = ['O', 'S', 'Se', 'Te', 'Po', 'Lv'] diff --git a/chython/periodictable/groupXVII.py b/chython/periodictable/groupXVII.py index da6ce4c0..3eecfc17 100644 --- a/chython/periodictable/groupXVII.py +++ b/chython/periodictable/groupXVII.py @@ -50,6 +50,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .42 + @property + def mdl_isotope(self): + return 19 + class Cl(Element, PeriodIII, GroupXVII): __slots__ = () @@ -89,6 +93,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .79 + @property + def mdl_isotope(self): + return 35 + class Br(Element, PeriodIV, GroupXVII): __slots__ = () @@ -135,6 +143,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 0.94 + @property + def mdl_isotope(self): + return 80 + class I(Element, PeriodV, GroupXVII): __slots__ = () @@ -203,6 +215,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.15 + @property + def mdl_isotope(self): + return 127 + class At(Element, PeriodVI, GroupXVII): __slots__ = () @@ -232,6 +248,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.27 + @property + def mdl_isotope(self): + return 210 + class Ts(Element, PeriodVII, GroupXVII): __slots__ = () @@ -260,5 +280,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.27 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 297 + __all__ = ['F', 'Cl', 'Br', 'I', 'At', 'Ts'] diff --git a/chython/periodictable/groupXVIII.py b/chython/periodictable/groupXVIII.py index 849a893c..b8137593 100644 --- a/chython/periodictable/groupXVIII.py +++ b/chython/periodictable/groupXVIII.py @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .31 + @property + def mdl_isotope(self): + return 4 + class Ne(Element, PeriodII, GroupXVIII): __slots__ = () @@ -77,6 +81,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .38 + @property + def mdl_isotope(self): + return 20 + class Ar(Element, PeriodIII, GroupXVIII): __slots__ = () @@ -105,6 +113,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .71 + @property + def mdl_isotope(self): + return 40 + class Kr(Element, PeriodIV, GroupXVIII): __slots__ = () @@ -133,6 +145,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 84 + class Xe(Element, PeriodV, GroupXVIII): __slots__ = () @@ -172,6 +188,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.08 + @property + def mdl_isotope(self): + return 131 + class Rn(Element, PeriodVI, GroupXVIII): __slots__ = () @@ -200,6 +220,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.2 + @property + def mdl_isotope(self): + return 222 + class Og(Element, PeriodVII, GroupXVIII): __slots__ = () @@ -228,5 +252,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.2 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 294 + __all__ = ['He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', 'Og'] From b0921dbb1ee7ebce21a9609add59c211c57321e2 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 9 Nov 2024 15:00:50 +0100 Subject: [PATCH 13/68] cache invalidation fixed for kekule and thiele --- chython/algorithms/aromatics/kekule.py | 3 +++ chython/algorithms/aromatics/thiele.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 5a7cc494..2452d320 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -52,6 +52,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool for n in atoms: self.calc_implicit(n) self.flush_cache() + self.calc_labels() return True return fixed @@ -70,6 +71,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): atoms.add(m) for n in atoms: copy.calc_implicit(n) + copy.calc_labels() yield copy def __fix_rings(self: 'MoleculeContainer'): @@ -92,6 +94,7 @@ def __fix_rings(self: 'MoleculeContainer'): bonds[n][m]._order = b if seen: self.flush_cache() + self.calc_labels() return True return False diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 0b2ce586..9c791ddc 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -164,6 +164,8 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: bonds[n][m]._order = o if not acceptors: break + self.flush_cache() + self.calc_labels() if double_bonded: # delete quinones for n in double_bonded: @@ -214,6 +216,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: bonds[n][m]._order = 4 self.flush_cache() + self.calc_labels() for ring in freaks: # aromatize rule based for q in freak_rules: if next(q.get_mapping(self, searching_scope=ring, automorphism_filter=False), None): @@ -224,6 +227,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: break if freaks: self.flush_cache() # flush again + self.calc_labels() self.fix_stereo() # check if any stereo centers vanished. return True From 3d082ef906c706a6425b96aa145a078786d602a3 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 10:49:37 +0100 Subject: [PATCH 14/68] api changes. all isomorphism labels now maintained --- chython/algorithms/rings.py | 46 ++++++++------------------- chython/containers/graph.py | 25 +++++++++++++-- chython/containers/molecule.py | 26 ++++++++------- chython/periodictable/base/element.py | 4 +-- 4 files changed, 52 insertions(+), 49 deletions(-) diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 0b50b2a4..d2cecf1d 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from CachedMethods import cached_args_method from collections import defaultdict, deque from functools import cached_property from itertools import combinations @@ -33,7 +32,7 @@ class Rings: __slots__ = () @cached_property - def sssr(self) -> Tuple[Tuple[int, ...], ...]: + def sssr(self) -> List[Tuple[int, ...]]: """ Smallest Set of Smallest Rings. Special bonds ignored. @@ -47,10 +46,10 @@ def sssr(self) -> Tuple[Tuple[int, ...], ...]: """ if self.rings_count: return _sssr(self.not_special_connectivity, self.rings_count) - return () + return [] @cached_property - def atoms_rings(self) -> Dict[int, Tuple[Tuple[int, ...]]]: + def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: """ Dict of atoms rings which contains it. """ @@ -58,28 +57,17 @@ def atoms_rings(self) -> Dict[int, Tuple[Tuple[int, ...]]]: for r in self.sssr: for n in r: rings[n].append(r) - return {n: tuple(rs) for n, rs in rings.items()} + return dict(rings) @cached_property - def atoms_rings_sizes(self) -> Dict[int, Tuple[int, ...]]: + def atoms_rings_sizes(self) -> Dict[int, Set[int]]: """ Sizes of rings containing atom. """ - return {n: tuple(len(r) for r in rs) for n, rs in self.atoms_rings.items()} - - @cached_args_method - def is_ring_bond(self: 'Graph', n: int, m: int, /) -> bool: - """ - Check is bond in any ring. - """ - self.bond(n, m) # check if bond exists - try: - return not set(self.atoms_rings[n]).isdisjoint(self.atoms_rings[m]) - except KeyError: - return False + return {n: {len(r) for r in rs} for n, rs in self.atoms_rings.items()} @cached_property - def ring_atoms(self): + def ring_atoms(self) -> Set[int]: """ Atoms in rings. Not SSSR based fast algorithm. """ @@ -136,13 +124,11 @@ def not_special_connectivity(self: 'Graph') -> Dict[int, Set[int]]: return bonds @cached_property - def connected_components(self: 'Graph') -> Tuple[Tuple[int, ...], ...]: + def connected_components(self: 'Graph') -> List[Set[int]]: """ Isolated components of single graph. E.g. salts as ion pair. """ - if not self._atoms: - return () - return tuple(tuple(x) for x in self._connected_components) + return _connected_components(self._bonds) @property def connected_components_count(self) -> int: @@ -158,12 +144,8 @@ def skin_graph(self: 'Graph') -> Dict[int, Set[int]]: """ return _skin_graph(self._bonds) - @cached_property - def _connected_components(self: 'Graph') -> List[Set[int]]: - return _connected_components(self._bonds) - -def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> Tuple[Tuple[int, ...], ...]: +def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> List[Tuple[int, ...]]: """ Smallest Set of Smallest Rings of any adjacency matrix. Number of rings required. @@ -529,7 +511,7 @@ def _connected_rings(rings, seen_rings): def _rings_filter(rings, n_sssr): c = next(rings) if n_sssr == 1: - return c, + return [c] seen_rings = {c} sssr_atoms = set(c) @@ -545,7 +527,7 @@ def _rings_filter(rings, n_sssr): sssr_atoms.update(c) sssr.append(c) if len(sssr) == n_sssr: - return tuple(sssr) + return sssr # now we have set of plug rings (cuban fullerene), besiege rings and condensed trash seen_rings = {c: _ring_adjacency(c) for c in seen_rings} # prepare adjacency @@ -558,7 +540,7 @@ def _rings_filter(rings, n_sssr): condensed_rings = _connected_rings(condensed_rings, seen_rings) sssr.append(c) if len(sssr) == n_sssr: - return tuple(sorted(sssr, key=len)) + return sorted(sssr, key=len) raise ImplementationError('SSSR count not reached') diff --git a/chython/containers/graph.py b/chython/containers/graph.py index fe3dc720..7fa5dead 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -101,7 +101,7 @@ def add_atom(self, atom: Atom, n: Optional[int] = None) -> int: self._atoms[n] = atom self._bonds[n] = {} - self.flush_cache() + self.flush_cache(keep_sssr=True) return n @abstractmethod @@ -169,8 +169,27 @@ def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): u._bonds.update(other._bonds) return u - def flush_cache(self): - self.__dict__.clear() + def flush_cache(self, *, keep_sssr=False, keep_components=False): + backup = {} + if keep_sssr: + # good to keep if no new bonds or bonds deletions or bonds to/from any change + if 'sssr' in self.__dict__: + backup['sssr'] = self.sssr + if 'atoms_rings' in self.__dict__: + backup['atoms_rings'] = self.atoms_rings + if 'atoms_rings_sizes' in self.__dict__: + backup['atoms_rings_sizes'] = self.atoms_rings_sizes + if 'ring_atoms' in self.__dict__: + backup['ring_atoms'] = self.ring_atoms + if 'not_special_connectivity' in self.__dict__: + backup['not_special_connectivity'] = self.not_special_connectivity + if 'rings_count' in self.__dict__: + backup['rings_count'] = self.rings_count + if keep_components: + # good to keep if no new bonds or bonds deletions + if 'connected_components' in self.__dict__: + backup['connected_components'] = self.connected_components + self.__dict__ = backup def __copy__(self): return self.copy() diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index c96fb713..be079c02 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -169,7 +169,7 @@ def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=Fals else: self._changed.add(n) if not _skip_calculation and self._backup is None: - self.fix_labels() + self.fix_structure() return n def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): @@ -192,7 +192,7 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): self._changed.add(n) self._changed.add(m) if not _skip_calculation and self._backup is None: - self.fix_labels() + self.fix_structure() self.fix_stereo() def delete_atom(self, n: int, *, _skip_calculation=False): @@ -213,7 +213,7 @@ def delete_atom(self, n: int, *, _skip_calculation=False): else: self._changed.add(m) if not _skip_calculation and self._backup is None: - self.fix_labels() + self.fix_structure() self.fix_stereo() def delete_bond(self, n: int, m: int, *, _skip_calculation=False): @@ -232,7 +232,7 @@ def delete_bond(self, n: int, m: int, *, _skip_calculation=False): self._changed.add(n) self._changed.add(m) if not _skip_calculation and self._backup is None: - self.fix_labels() + self.fix_structure() self.fix_stereo() def copy(self) -> 'MoleculeContainer': @@ -321,7 +321,7 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul sbn[m] = sb[m][n] elif m in atoms: sbn[m] = bond.copy(stereo=True) - sub.fix_labels(recalculate_hydrogens=recalculate_hydrogens) + sub.fix_structure(recalculate_hydrogens=recalculate_hydrogens) sub.fix_stereo() return sub @@ -693,22 +693,21 @@ def _augmented_substructure(self, atoms: Iterable[int], deep: int): nodes.append(n) return nodes - def fix_labels(self, recalculate_hydrogens=True): + def fix_structure(self, recalculate_hydrogens=True): """ - Fix molecule internal represenation + Fix molecule internal representation """ - if not self._changed: - return - self.calc_labels() # refresh all labels if recalculate_hydrogens: - for n in self._changed: + for n in (self._changed or self._atoms): self.calc_implicit(n) # fix Hs count self._changed = None def calc_labels(self): atoms = self._atoms + atoms_rings_sizes = self.atoms_rings_sizes # expensive: sssr based + for n, m_bond in self._bonds.items(): neighbors = 0 heteroatoms = 0 @@ -741,6 +740,9 @@ def calc_labels(self): atom._hybridization = hybridization atom._explicit_hydrogens = explicit_hydrogens + atom._in_ring = n in atoms_rings_sizes + atom._ring_sizes = atoms_rings_sizes.get(n) or set() + def calc_implicit(self, n: int): """ Set firs possible hydrogens count based on rules @@ -868,7 +870,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self._name = backup._name self.flush_cache() else: # update internal state - self.fix_labels() + self.fix_structure() self.fix_stereo() self._backup = None # drop backup diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 9014e064..88ca210e 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -244,7 +244,7 @@ def hybridization(self): return self._hybridization @property - def ring_sizes(self) -> Tuple[int, ...]: + def ring_sizes(self) -> Set[int]: """ Atom rings sizes. """ @@ -274,7 +274,7 @@ def copy(self, full=False, hydrogens=False, stereo=False) -> 'Element': copy._neighbors = self.neighbors copy._heteroatoms = self.heteroatoms copy._hybridization = self.hybridization - copy._ring_sizes = self.ring_sizes + copy._ring_sizes = self.ring_sizes.copy() copy._in_ring = self.in_ring else: if hydrogens: From 269da1a42ea0dc98ec6c847ffb5b97658c54ceaa Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 10:53:03 +0100 Subject: [PATCH 15/68] fixed aromaticity handling --- chython/algorithms/aromatics/kekule.py | 8 ++++++-- chython/algorithms/aromatics/thiele.py | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 2452d320..f7d90918 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -51,7 +51,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool atoms.add(m) for n in atoms: self.calc_implicit(n) - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) self.calc_labels() return True return fixed @@ -78,6 +78,7 @@ def __fix_rings(self: 'MoleculeContainer'): atoms = self._atoms bonds = self._bonds seen = set() + keep = True for q, af, bf, mm in rules: for mapping in q.get_mapping(self, automorphism_filter=False): match = set(mapping.values()) @@ -92,8 +93,11 @@ def __fix_rings(self: 'MoleculeContainer'): n = mapping[n] m = mapping[m] bonds[n][m]._order = b + if b == 8: + # flush sssr and components cache + keep = False if seen: - self.flush_cache() + self.flush_cache(keep_sssr=keep, keep_components=keep) self.calc_labels() return True return False diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 9c791ddc..f236e887 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -164,7 +164,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: bonds[n][m]._order = o if not acceptors: break - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) self.calc_labels() if double_bonded: # delete quinones @@ -215,7 +215,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: for n, m in zip(ring, ring[1:]): bonds[n][m]._order = 4 - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) self.calc_labels() for ring in freaks: # aromatize rule based for q in freak_rules: @@ -226,7 +226,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: bonds[n][m]._order = 4 break if freaks: - self.flush_cache() # flush again + self.flush_cache(keep_sssr=True, keep_components=True) # flush again self.calc_labels() self.fix_stereo() # check if any stereo centers vanished. return True From f7d8e899bd85e194cdba63ca381756d5d290d6ac Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 12:54:45 +0100 Subject: [PATCH 16/68] refactored standardization --- chython/algorithms/morgan.py | 11 +- chython/algorithms/standardize/molecule.py | 165 ++++++++----------- chython/algorithms/standardize/resonance.py | 61 ++++--- chython/algorithms/standardize/salts.py | 37 ++--- chython/algorithms/standardize/saturation.py | 30 ++-- 5 files changed, 130 insertions(+), 174 deletions(-) diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index 659c50c8..36086ada 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -40,13 +40,12 @@ def atoms_order(self: 'Graph') -> Dict[int, int]: :return: dict of atom-order pairs """ - atoms = self._atoms - if not atoms: # for empty containers + if not self._atoms: # for empty containers return {} - elif len(atoms) == 1: # optimize single atom containers - return dict.fromkeys(atoms, 1) + elif len(self._atoms) == 1: # optimize single atom containers + return dict.fromkeys(self._atoms, 1) ring = self.ring_atoms - return _morgan({n: hash((hash(a), n in ring)) for n, a in atoms.items()}, self.int_adjacency) + return _morgan({n: hash((hash(a), n in ring)) for n, a in self._atoms.items()}, self.int_adjacency) @cached_property def int_adjacency(self: 'Graph') -> Dict[int, Dict[int, int]]: diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index c9fb0893..049671a2 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -50,7 +50,7 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F h, changed = self.implicify_hydrogens(_fix_stereo=False, logging=True) if fix_tautomers and (logging or keep_kekule): # thiele can change tautomeric form - hgs = self._hydrogens.copy() + hgs = {n: a.implicit_hydrogens for n, a in self._atoms.items()} if keep_kekule: # save bond orders bonds = [(b, b.order) for _, _, b in self.bonds()] @@ -65,8 +65,9 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F self.kekule() # we need to do full kekule again else: for b, o in bonds: # noqa - b._Bond__order = o # noqa - self.flush_cache() + b._order = o + self.flush_cache() + self.calc_labels() if logging: if k: @@ -75,13 +76,12 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F s.append((tuple(changed), -1, 'implicified')) if t: s.append(((), -1, 'aromatized')) - if fix_tautomers and hgs != self._hydrogens: - s.append((tuple(x for x, y in self._hydrogens.items() if hgs[x] != y), - -1, 'aromatic tautomer found')) + if fix_tautomers and (x := tuple(n for n, a in self._atoms.items() if hgs[n] != a.implicit_hydrogens)): + s.append((x, -1, 'aromatic tautomer found')) if c: s.append((tuple(c), -1, 'recharged')) if keep_kekule and t: - if c or fix_tautomers and hgs != self._hydrogens: + if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self._atoms.items()): s.append(((), -1, 'kekulized again')) else: s.append(((), -1, 'kekule form restored')) @@ -118,16 +118,14 @@ def standardize(self: Union['MoleculeContainer', 'Standardize'], *, logging=Fals log.extend(l) fixed.update(f) - if b := fixed.intersection(n for n, h in self._hydrogens.items() if h is None): + if b := fixed.intersection(n for n, a in self._atoms.items() if a.implicit_hydrogens is None): if ignore: log.append((tuple(b), -1, 'standardization failed')) else: raise ImplementationError(f'standardization leads to invalid valences: {b}') - if fixed: - self.flush_cache() - if _fix_stereo: - self.fix_stereo() + if fixed and _fix_stereo: + self.fix_stereo() if logging: if fixed: @@ -146,10 +144,7 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol changed: List[int] = [] bonds = self._bonds nsc = self.not_special_connectivity - hydrogens = self._hydrogens - charges = self._charges atoms = self._atoms - hybridization = self.hybridization if prepare_molecule: self.thiele() @@ -165,25 +160,25 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol # if not 2 neighbors and 1 hydrogen or 3 neighbors within 1st and second atoms - break atom_1, atom_2 = mapping[1], mapping[2] if len(bonds[atom_1]) == 2: - if not hydrogens[atom_1]: + if not atoms[atom_1].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_1].values()): continue if len(bonds[atom_2]) == 2: - if not hydrogens[atom_2]: + if not atoms[atom_2].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_2].values()): continue if fix: atom_3 = mapping[3] - charges[atom_3] = 0 + atoms[atom_3]._charge = 0 changed.append(atom_3) else: - charges[atom_1] = 0 + atoms[atom_1]._charge = 0 changed.append(atom_1) - charges[atom_2] = 1 + atoms[atom_2]._charge = 1 changed.append(atom_2) # add atoms to changed # morgan @@ -196,36 +191,36 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol seen.update(match) atom_1, atom_2 = mapping[1], mapping[2] if len(bonds[atom_1]) == 2: - if not hydrogens[atom_1]: + if not atoms[atom_1].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_1].values()): continue if len(bonds[atom_2]) == 2: - if not hydrogens[atom_2]: + if not atoms[atom_2].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_2].values()): continue if fix: atom_3 = mapping[3] - charges[atom_3] = 0 + atoms[atom_3]._charge = 0 changed.append(atom_3) else: # remove charge from 1st N atom - charges[atom_1] = 0 + atoms[atom_1]._charge = 0 pairs.append((atom_1, atom_2, fix)) if pairs: self.__dict__.pop('atoms_order', None) # remove cached morgan for atom_1, atom_2, fix in pairs: if self.atoms_order[atom_1] > self.atoms_order[atom_2]: - charges[atom_2] = 1 + atoms[atom_2]._charge = 1 changed.append(atom_2) if not fix: changed.append(atom_1) else: - charges[atom_1] = 1 + atoms[atom_1]._charge = 1 if fix: changed.append(atom_1) del self.__dict__['atoms_order'] # remove invalid morgan @@ -233,9 +228,9 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol # ferrocene fcr = [] for r in self.sssr: - if len(r) != 5 or not all(hybridization(n) == 4 for n in r): + if len(r) != 5 or not all(atoms[n].hybridization == 4 for n in r): continue - ch = [(n, x) for n in r if (x := charges[n])] + ch = [(n, x) for n in r if (x := atoms[n].charge)] if len(ch) != 1 or ch[0][1] != -1: continue ch = ch[0][0] @@ -243,19 +238,19 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b.order == 1 for b in bonds[n].values()))] if len(ca) < 2 or ch not in ca: continue - charges[ch] = 0 # reset charge for morgan recalculation + atoms[ch]._charge = 0 # reset charge for morgan recalculation fcr.append(ca) changed.append(ch) if fcr: self.__dict__.pop('atoms_order', None) # remove cached morgan for ca in fcr: n = min(ca, key=self.atoms_order.get) - charges[n] = -1 + atoms[n]._charge = -1 changed.append(n) del self.__dict__['atoms_order'] # remove invalid morgan if changed: - self.flush_cache() # clear cache + self.flush_cache(keep_sssr=True, keep_components=True) # clear cache if _fix_stereo: self.fix_stereo() if logging: @@ -284,7 +279,8 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, del bonds[n][m], bonds[m][n] if ab: - self.flush_cache() + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() return len(ab) @@ -299,12 +295,7 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo :param logging: return list of changed atoms. """ atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - plane = self._plane - hydrogens = self._hydrogens - parsed_mapping = self._parsed_mapping explicit = defaultdict(list) for n, atom in atoms.items(): @@ -322,8 +313,6 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo fixed = {} for n, hs in explicit.items(): atom = atoms[n] - charge = charges[n] - is_radical = radicals[n] len_h = len(hs) for i in range(len_h, 0, -1): hi = hs[:i] @@ -335,7 +324,7 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo explicit_dict[(bond.order, atoms[m].atomic_number)] += 1 try: # aromatic rings don't match any rule - rules = atom.valence_rules(charge, is_radical, explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: break for s, d, h in rules: @@ -349,23 +338,15 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo for n in to_remove: del atoms[n] - del charges[n] - del radicals[n] - del plane[n] - del hydrogens[n] for m in bonds.pop(n): del bonds[m][n] - try: - del parsed_mapping[n] - except KeyError: - pass for n, h in fixed.items(): - hydrogens[n] = h + atoms[n]._implicit_hydrogens = h if to_remove: - self.flush_cache() - self._conformers = [{x: y for x, y in c.items() if x not in to_remove} for c in self._conformers] # noqa + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() @@ -380,26 +361,28 @@ def explicify_hydrogens(self: 'MoleculeContainer', *, start_map=None, _return_ma :return: number of added atoms """ - hydrogens = self._hydrogens + atoms = self._atoms to_add = [] - for n, h in hydrogens.items(): + for n, a in atoms.items(): try: - to_add.extend([n] * h) + to_add.extend([n] * a.implicit_hydrogens) except TypeError: raise ValenceError(f'atom {n} has valence error') if to_add: log = [] bonds = self._bonds - m = start_map + m = start_map if start_map is not None else max(atoms) + 1 for n in to_add: - m = self.add_atom(H(), m) - bonds[n][m] = bonds[m][n] = b = Bond(1) - b._attach_graph(self, n, m) - hydrogens[n] = 0 + atoms[m] = H(implicit_hydrogens=0) + bonds[n][m] = b = Bond(1) + bonds[m] = {n: b} + atoms[n]._implicit_hydrogens = 0 log.append((n, m)) m += 1 + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() if _return_map: @@ -415,35 +398,33 @@ def check_valence(self: 'MoleculeContainer') -> List[int]: :return: list of invalid atoms """ - return [n for n, h in self._hydrogens.items() if h is None] # only invalid atoms have None hydrogens. + # only invalid atoms have None hydrogens. + return [n for n, a in self._atoms.items() if a.implicit_hydrogens is None] def clean_isotopes(self: 'MoleculeContainer') -> bool: """ Clean isotope marks from molecule. Return True if any isotope found. """ - atoms = self._atoms - isotopes = [x for x in atoms.values() if x.isotope] + isotopes = [x for x in self._atoms.values() if x.isotope] if isotopes: for i in isotopes: i._isotope = None - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) self.fix_stereo() return True return False def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): + atoms = self._atoms bonds = self._bonds - charges = self._charges - radicals = self._radicals - calc_implicit = self.calc_implicit log = [] fixed = set() - flush = False for r, (pattern, atom_fix, bonds_fix, any_atoms, is_tautomer) in enumerate(rules): if not fix_tautomers and is_tautomer: continue + keep_sssr = keep_components = True hs = set() seen = set() for mapping in pattern.get_mapping(self, automorphism_filter=False): @@ -457,53 +438,37 @@ def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): for n, (ch, ir) in atom_fix.items(): n = mapping[n] hs.add(n) - charges[n] += ch - if charges[n] > 4: - charges[n] -= ch + a = atoms[n] + a._charge += ch + if a.charge > 4: + a._charge -= ch log.append((tuple(match), r, f'bad charge formed. changes omitted: {pattern}')) break # skip changes if ir is not None: - radicals[n] = ir + a._is_radical = ir else: - for n, m, b in bonds_fix: + for n, m, bo in bonds_fix: n = mapping[n] m = mapping[m] hs.add(n) hs.add(m) if m in bonds[n]: - bonds[n][m]._Bond__order = b # noqa - if b == 8: - # expected original molecule don't contain `any` bonds or these bonds not changed - flush = True - else: - if b != 8: - flush = True - bonds[n][m] = bonds[m][n] = b = Bond(b) - b._attach_graph(self, n, m) + b = bonds[n][m] + if b.order == 8 or b == 8: + keep_sssr = False + b._order = bo + else: # new bond + keep_sssr = keep_components = False + bonds[n][m] = bonds[m][n] = Bond(bo) log.append((tuple(match), r, str(pattern))) if not hs: # not matched continue - # flush cache only for changed atoms. - if flush: # neighbors count changed - ngb = self.__dict__['__cached_args_method_neighbors'] - for n in hs: - try: - del ngb[(n,)] - except KeyError: - pass - del self.__dict__['bonds_count'] - flush = False - # need hybridization recalculation - hyb = self.__dict__['__cached_args_method_hybridization'] - for n in hs: - try: - del hyb[(n,)] - except KeyError: # already flushed before - pass + self.flush_cache(keep_sssr=keep_sssr, keep_components=keep_components) + # recalculate isomorphism labels + self.calc_labels() for n in hs: # hydrogens count recalculation - calc_implicit(n) - del self.__dict__['_cython_compiled_structure'] + self.calc_implicit(n) fixed.update(hs) return log, fixed diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 696b977c..31f0a0da 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -35,21 +35,18 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals :param logging: return list of changed atoms. """ atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - calc_implicit = self.calc_implicit entries, exits, rads, constrains, nitrogen_cat, nitrogen_ani, sulfur_cat = self.__entries() hs = set() while len(rads) > 1: n = rads.pop() for path in self.__find_delocalize_path(n, rads, constrains, True): - radicals[n] = False + atoms[n]._is_radical = False hs.add(n) for n, m, b in path: hs.add(m) - bonds[n][m]._Bond__order = b # noqa - radicals[m] = False # noqa + bonds[n][m]._order = b + atoms[m]._is_radical = False # noqa rads.discard(m) break # path found # path not found. atom n keep as is @@ -60,29 +57,31 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals if n in nitrogen_cat and m in nitrogen_ani: continue - c_m = charges[m] - 1 if m in sulfur_cat: # prevent X-[S+]=X >> X=S=X if b != 1: continue + atoms[m]._charge -= 1 else: # check cations end valence. + atoms[m]._charge -= 1 # reduce atom change and check valence try: - atoms[m].valence_rules(c_m, radicals[m], sum(int(y) for x, y in bonds[m].items() if x != l) + b) + atoms[m].valence_rules(sum(int(y) for x, y in bonds[m].items() if x != l) + b) except ValenceError: + atoms[m]._charge += 1 # roll back continue - charges[n] += 1 + # succeed! + atoms[n]._charge += 1 hs.add(n) for n, m, b in path: hs.add(m) - bonds[n][m]._Bond__order = b # noqa - charges[m] = c_m + bonds[n][m]._order = b exits.discard(m) break # path from negative atom to positive atom found. # path not found. keep negative atom n as is if hs: for n in hs: - calc_implicit(n) - self.flush_cache() + self.calc_implicit(n) + self.flush_cache(keep_sssr=True, keep_components=True) if _fix_stereo: self.fix_stereo() if logging: @@ -121,13 +120,9 @@ def __find_delocalize_path(self: 'MoleculeContainer', start, finish, constrains, if n not in seen and n in constrains and 1 <= (bo := b.order + diff) <= 3) def __entries(self: 'MoleculeContainer'): - hybridization = self.hybridization - neighbors = self.neighbors - charges = self._charges - radicals = self._radicals - bonds = self._bonds atoms = self._atoms - errors = {n for n, h in self._hydrogens.items() if h is None} + bonds = self._bonds + errors = {n for n, a in atoms.items() if a.implicit_hydrogens is None} transfer = set() entries = set() @@ -140,9 +135,9 @@ def __entries(self: 'MoleculeContainer'): if a.atomic_number not in {5, 6, 7, 8, 14, 15, 16, 33, 34, 52}: # filter non-organic set, halogens and aromatics continue - elif radicals[n]: + elif a.is_radical: rads.add(n) - elif charges[n] == -1: + elif a.charge == -1: if (lb := len(bonds[n])) == 4 and a.atomic_number == 5: # skip boron continue elif lb == 6 and a.atomic_number == 15: # skip [P-]X6 @@ -150,35 +145,37 @@ def __entries(self: 'MoleculeContainer'): if n in errors: # only valid anions accepted continue entries.add(n) - elif charges[n] == 1: + elif a.charge == 1: lb = len(bonds[n]) if a.atomic_number == 7: if lb == 4: # skip ammonia continue - elif lb == 2 and hybridization(n) == 3: # skip Azide + elif lb == 2 and a.hybridization == 3: # skip Azide (n1, b1), (n2, b2) = bonds[n].items() - if b1.order == b2.order == 2 and (charges[n1] == -1 and atoms[n1].atomic_number == 7 or - charges[n2] == -1 and atoms[n2].atomic_number == 7): + an1 = atoms[n1] + an2 = atoms[n2] + if b1.order == b2.order == 2 and (an1.charge == -1 and an1.atomic_number == 7 or + an2.charge == -1 and an2.atomic_number == 7): continue - elif lb == 3 and hybridization(n) == 2: # X=[N+](-X)-X - prevent N-N migration + elif lb == 3 and a.hybridization == 2: # X=[N+](-X)-X - prevent N-N migration nitrogen_ani.add(n) elif a.atomic_number == 15 and lb == 4: # skip [P+]R4 continue elif a.atomic_number == 16: - if lb == 2 and hybridization(n) == 2: # ad-hoc for X-[S+]=X + if lb == 2 and a.hybridization == 2: # ad-hoc for X-[S+]=X sulfur_cat.add(n) - elif lb == 3 and hybridization(n) == 1: # ad-hoc for X-[S+](-X)-X + elif lb == 3 and a.hybridization == 1: # ad-hoc for X-[S+](-X)-X continue exits.add(n) transfer.add(n) if exits or entries: # try to move cation to nitrogen. saturation fixup. for n, a in self._atoms.items(): - if a.atomic_number == 7 and not charges[n]: - if hybridization(n) == 1 and neighbors(n) <= 3: # any amine - potential e-donor + if a.atomic_number == 7 and not a.charge: + if a.hybridization == 1 and a.neighbors <= 3: # any amine - potential e-donor entries.add(n) nitrogen_cat.add(n) - elif hybridization(n) == 3 and neighbors(n) == 1: # N#X-[X-] >> [N-]=X=X + elif a.hybridization == 3 and a.neighbors == 1: # N#X-[X-] >> [N-]=X=X exits.add(n) nitrogen_ani.add(n) return entries, exits, rads, transfer, nitrogen_cat, nitrogen_ani, sulfur_cat diff --git a/chython/algorithms/standardize/salts.py b/chython/algorithms/standardize/salts.py index 08a34250..d281b593 100644 --- a/chython/algorithms/standardize/salts.py +++ b/chython/algorithms/standardize/salts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -33,16 +33,20 @@ def remove_metals(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Li :param logging: return deleted atoms list. """ + atoms = self._atoms bonds = self._bonds metals = [] - for n, a in self._atoms.items(): - if a.atomic_symbol not in {7, 3, 4, 11, 12, 19, 20, 37, 38, 55, 56} and not bonds[n]: + for n, a in atoms.items(): + if a.atomic_number in {7, 3, 4, 11, 12, 19, 20, 37, 38, 55, 56} and not bonds[n]: metals.append(n) if 0 < len(metals) < len(self): for n in metals: - self.delete_atom(n) + del atoms[n] + del bonds[n] + + self.flush_cache(keep_sssr=True) if logging: return metals return True @@ -64,27 +68,12 @@ def remove_acids(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Lis log.extend(c) if 0 < len(log) < len(self): # prevent singularity atoms = self._atoms - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - plane = self._plane bonds = self._bonds - parsed_mapping = self._parsed_mapping - - self._conformers.clear() # clean conformers. for n in log: del atoms[n] - del charges[n] - del radicals[n] - del hydrogens[n] - del plane[n] del bonds[n] - try: - del parsed_mapping[n] - except KeyError: - pass self.flush_cache() if logging: return log @@ -99,10 +88,10 @@ def split_metal_salts(self: 'MoleculeContainer', *, logging=False) -> Union[bool :param logging: return deleted bonds list. """ + atoms = self._atoms bonds = self._bonds - charges = self._charges - metals = [n for n, a in self._atoms.items() if a.atomic_number in + metals = [n for n, a in atoms.items() if a.atomic_number in {3, 4, 11, 12, 19, 20, 37, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102}] if metals: @@ -114,12 +103,12 @@ def split_metal_salts(self: 'MoleculeContainer', *, logging=False) -> Union[bool for n in metals: for m in acceptors & bonds[n].keys(): - if charges[n] == 4: # prevent overcharging + if atoms[n].charge == 4: # prevent overcharging break del bonds[n][m] del bonds[m][n] - charges[n] += 1 - charges[m] -= 1 + atoms[n]._charge += 1 + atoms[m]._charge -= 1 log.append((n, m)) if log: self.flush_cache() diff --git a/chython/algorithms/standardize/saturation.py b/chython/algorithms/standardize/saturation.py index df9de68a..38c5bb1e 100644 --- a/chython/algorithms/standardize/saturation.py +++ b/chython/algorithms/standardize/saturation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -72,13 +72,17 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, raise ValenceError('only single bonded skeleton can be saturated') atoms = self._atoms if not reset_electrons: - expected_radicals_count = any(self._radicals.values()) + expected_radicals_count = sum(a.is_radical for a in atoms.values()) expected_charge = int(self) + if reset_electrons: + charges = {x: None for x in self._atoms} + radicals = {x: None for x in self._atoms} + else: + charges = {n: a.charge for n, a in self._atoms.items()} + radicals = {n: a.is_radical for n, a in self._atoms.items()} sat, adjacency = _find_possible_valences(atoms, neighbors_distances or self._bonds, - {x: None for x in self._atoms} if reset_electrons else self._charges, - {x: None for x in self._atoms} if reset_electrons else self._radicals, - neighbors_distances is not None) + charges, radicals, neighbors_distances is not None) charges = {} # new charge states radicals = {} # new radical states bonds = {n: {} for n in atoms} # new bonds @@ -95,8 +99,7 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, radicals[n] = r for m in env: if m not in seen: - bonds[n][m] = bonds[m][n] = b = Bond(1) - b._attach_graph(self, n, m) + bonds[n][m] = bonds[m][n] = Bond(1) else: unsaturated[n] = [(c, r, h)] else: @@ -142,8 +145,7 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, return False for n, m, b in sb: - bonds[n][m] = bonds[m][n] = b = Bond(b) - b._attach_graph(self, n, m) + bonds[n][m] = bonds[m][n] = Bond(b) for n, c, r in sa: charges[n] = c radicals[n] = r @@ -155,10 +157,14 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, return False # reset molecule self._bonds = bonds - self._radicals = radicals - self._charges = charges - self._hydrogens = {x: 0 for x in atoms} # reset invalid hydrogens counts. + for n, r in radicals.items(): + atoms[n]._is_radical = r + for n, c in charges.items(): + atoms[n]._charge = c + for a in atoms.values(): + a._implicit_hydrogens = 0 # reset invalid hydrogens counts. self.flush_cache() + self.calc_labels() if logging: if not log: # check for errors log.append('Saturated successfully') From 0f46bc23ef72e56d0300c8e7ead355c58ec1d2b1 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 12:57:58 +0100 Subject: [PATCH 17/68] fix --- chython/algorithms/standardize/reaction.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chython/algorithms/standardize/reaction.py b/chython/algorithms/standardize/reaction.py index 17128417..1cb20f28 100644 --- a/chython/algorithms/standardize/reaction.py +++ b/chython/algorithms/standardize/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2022 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # Copyright 2021 Timur Gimadiev # Copyright 2024 Philippe Gantzer # This file is part of chython. @@ -90,7 +90,7 @@ def thiele(self: 'ReactionContainer', *, fix_tautomers=True) -> bool: """ total = False for m in self.molecules(): - if m.thiele(fix_tautomers=fix_tautomers) and not total: + if m.thiele(fix_tautomers=fix_tautomers): total = True if total: self.flush_cache() @@ -105,7 +105,7 @@ def kekule(self: 'ReactionContainer', *, buffer_size=7) -> bool: """ total = False for m in self.molecules(): - if m.kekule(buffer_size=buffer_size) and not total: + if m.kekule(buffer_size=buffer_size): total = True if total: self.flush_cache() @@ -118,7 +118,7 @@ def clean_isotopes(self: 'ReactionContainer') -> bool: """ flag = False for m in self.molecules(): - if m.clean_isotopes() and not flag: + if m.clean_isotopes(): flag = True if flag: self.flush_cache() From 177373e57e94932bbc0081eb32b10f742715efd4 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 13:17:50 +0100 Subject: [PATCH 18/68] isomorphism fixed --- chython/algorithms/isomorphism.py | 20 ++++++++++---------- chython/containers/bonds.py | 2 -- chython/containers/molecule.py | 4 ++++ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index ce9193bc..eb44cc71 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -106,7 +106,7 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No seen = set() if len(components) == 1: - for candidate in other._connected_components: + for candidate in other.connected_components: if searching_scope: candidate = searching_scope.intersection(candidate) if not candidate: @@ -119,7 +119,7 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No seen.add(atoms) yield mapping else: - for candidates in permutations(other._connected_components, len(components)): + for candidates in permutations(other.connected_components, len(components)): mappers = [] for component, candidate in zip(components, candidates): if searching_scope: @@ -206,23 +206,23 @@ def _cython_compiled_structure(self): if a.isotope: v3 = 1 << (a.isotope - a.mdl_isotope + 54) - if radicals[n]: + if a.is_radical: v3 |= 0x200000000000 else: v3 |= 0x100000000000 - elif radicals[n]: + elif a.is_radical: v3 = 0x8000200000000000 else: v3 = 0x8000100000000000 - v3 |= 1 << (charges[n] + 39) - v3 |= 1 << ((hydrogens[n] or 0) + 30) - v3 |= 1 << (neighbors(n) + 15) - v3 |= 1 << heteroatoms(n) + v3 |= 1 << (a.charge + 39) + v3 |= 1 << ((a.implicit_hydrogens or 0) + 30) + v3 |= 1 << (a.neighbors + 15) + v3 |= 1 << a.heteroatoms - if n in rings_sizes: + if a.ring_sizes: v4 = 0 - for r in rings_sizes[n]: + for r in a.ring_sizes: if r > 65: # big rings not supported continue v4 |= 1 << (65 - r) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index 79f13cad..a6ce7721 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -28,7 +28,6 @@ def __init__(self, order: int): elif order not in (1, 4, 2, 3, 8): raise ValueError('order should be from [1, 2, 3, 4, 8]') self._order = order - self._in_ring = False self._stereo = None def __eq__(self, other): @@ -72,7 +71,6 @@ def copy(self, full=False, stereo=False) -> 'Bond': copy._stereo = self.stereo copy._in_ring = self.in_ring else: - copy._in_ring = False if stereo: copy._stereo = self.stereo else: diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index be079c02..9c47cf46 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -707,13 +707,17 @@ def fix_structure(self, recalculate_hydrogens=True): def calc_labels(self): atoms = self._atoms atoms_rings_sizes = self.atoms_rings_sizes # expensive: sssr based + atoms_rings = {n: set(r) for n, r in self.atoms_rings.items()} for n, m_bond in self._bonds.items(): neighbors = 0 heteroatoms = 0 hybridization = 1 explicit_hydrogens = 0 + ar = atoms_rings[n] for m, bond in m_bond.items(): + bond._in_ring = not ar.isdisjoint(atoms_rings[m]) # have common rings + order = bond.order if order == 8: continue From d983bb5bc3d3b4f021d1a8c5fea258397161d1eb Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 13:38:58 +0100 Subject: [PATCH 19/68] isomorphism fixed --- chython/algorithms/isomorphism.py | 4 ++-- chython/containers/molecule.py | 4 ++-- chython/periodictable/base/query.py | 11 ++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index eb44cc71..4f8c1e74 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -22,7 +22,7 @@ from itertools import permutations from typing import Any, Collection, Dict, Iterator, Optional, TYPE_CHECKING, Union from .._functions import lazy_product -from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement +from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement, QueryElement if TYPE_CHECKING: @@ -367,7 +367,7 @@ def _cython_compiled_query(self): else: v1 = 1 << (57 - n) v2 = 0 - if a.isotope: + if isinstance(a, QueryElement) and a.isotope: v3 = 1 << (a.isotope - a.mdl_isotope + 54) if a.is_radical: v3 |= 0x200000000000 diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 9c47cf46..fc2c7cb2 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -714,9 +714,9 @@ def calc_labels(self): heteroatoms = 0 hybridization = 1 explicit_hydrogens = 0 - ar = atoms_rings[n] + anr = atoms_rings.get(n) or False for m, bond in m_bond.items(): - bond._in_ring = not ar.isdisjoint(atoms_rings[m]) # have common rings + bond._in_ring = anr and (amr := atoms_rings.get(m) or False) and not anr.isdisjoint(amr) # have common rings order = bond.order if order == 8: diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 2089bc17..1d00a29b 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -268,7 +268,7 @@ def __eq__(self, other): return False if self.ring_sizes: if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): + if other.ring_sizes.isdisjoint(self.ring_sizes): return False elif other.ring_sizes: # not in ring expected return False @@ -342,7 +342,7 @@ def __eq__(self, other): return False if self.ring_sizes: if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): + if other.ring_sizes.isdisjoint(self.ring_sizes): return False elif other.ring_sizes: # not in ring expected return False @@ -407,6 +407,11 @@ def isotope(self, value: Optional[int]): raise TypeError('isotope must be an int') self._isotope = value + @property + @abstractmethod + def mdl_isotope(self) -> int: + ... + @classmethod def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: """ @@ -485,7 +490,7 @@ def __eq__(self, other): return False if self.ring_sizes: if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): + if other.ring_sizes.isdisjoint(self.ring_sizes): return False elif other.ring_sizes: # not in ring expected return False From acb4ad90da5f6d6447102f0aa8599838edb06e07 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:18:53 +0100 Subject: [PATCH 20/68] fixed depict --- chython/algorithms/depict.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index 1189d32a..a48eb6c7 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -344,9 +344,6 @@ def __render_atoms(self: 'MoleculeContainer', uid): stroke_width_o = other_size * .1 stroke_width_m = mapping_size * .1 - # for cumulenes - cumulenes = {y for x in self._cumulenes(heteroatoms=True) if len(x) > 2 for y in x[1:-1]} - svg = [] maps = [] symbols = [] @@ -358,7 +355,8 @@ def __render_atoms(self: 'MoleculeContainer', uid): for n, atom in self._atoms.items(): x, y = atom.x, -atom.y symbol = atom.atomic_symbol - if not bonds[n] or symbol != 'C' or carbon or atom.charge or atom.is_radical or atom.isotope or n in cumulenes: + if (symbol != 'C' or atom.charge or atom.is_radical or atom.isotope or carbon + or not bonds[n] or sum(b == 2 for b in bonds[n].values()) == 2): if atom.charge: others.append(f' ' f'{_render_charge[atom.charge]}{"↑" if atom.is_radical else ""}') From d1a3909b6487dd69a13bffcbfbca607944fdadc4 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:23:44 +0100 Subject: [PATCH 21/68] new attrs of atoms. fixes. --- chython/algorithms/rings.py | 2 +- chython/algorithms/x3dom.py | 10 ++++++-- chython/files/xyz.py | 8 +++--- chython/periodictable/base/element.py | 14 +++++++++++ chython/periodictable/groupI.py | 4 +++ chython/periodictable/groupXIII.py | 8 ++++++ chython/periodictable/groupXIV.py | 24 ++++++++++++++++++ chython/periodictable/groupXV.py | 32 ++++++++++++++++++++++++ chython/periodictable/groupXVI.py | 32 ++++++++++++++++++++++++ chython/periodictable/groupXVII.py | 36 +++++++++++++++++++++++++++ 10 files changed, 162 insertions(+), 8 deletions(-) diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index d2cecf1d..37cde6dc 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -51,7 +51,7 @@ def sssr(self) -> List[Tuple[int, ...]]: @cached_property def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: """ - Dict of atoms rings which contains it. + A dictionary with atom numbers as keys and a list of tuples (representing rings) as values. """ rings = defaultdict(list) for r in self.sssr: diff --git a/chython/algorithms/x3dom.py b/chython/algorithms/x3dom.py index f5da216d..2118899b 100644 --- a/chython/algorithms/x3dom.py +++ b/chython/algorithms/x3dom.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # Copyright 2020 Dinar Batyrshin # This file is part of chython. # @@ -141,7 +141,13 @@ def depict3d(self: Union['MoleculeContainer', 'X3domMolecule'], index: int = 0) :param index: index of conformer """ - xyz = self._conformers[index] + if not hasattr(self, '_conformers'): + raise ValueError('No conformers stored within structure') + try: + xyz = self._conformers[index] + except IndexError: + raise IndexError('Invalid conformer index') + mx = sum(x for x, _, _ in xyz.values()) / len(xyz) my = sum(y for _, y, _ in xyz.values()) / len(xyz) mz = sum(z for _, _, z in xyz.values()) / len(xyz) diff --git a/chython/files/xyz.py b/chython/files/xyz.py index 612415bc..a77a8489 100644 --- a/chython/files/xyz.py +++ b/chython/files/xyz.py @@ -31,16 +31,13 @@ def xyz(matrix: Sequence[Tuple[str, float, float, float]], charge=0, radical=0, mol = _cls() conformer = {} - mol._conformers.append(conformer) + mol._conformers = [conformer] atoms = mol._atoms bonds = mol._bonds for n, (a, x, y, z) in enumerate(matrix, 1): - atoms[n] = atom = Element.from_symbol(a)() + atoms[n] = Element.from_symbol(a)(x=x, y=y, implicit_hydrogens=0) bonds[n] = {} - atom.x = x - atom.y = y - atom._implicit_hydrogens = 0 conformer[n] = (x, y, z) if atom_charge is not None and None not in atom_charge: @@ -48,6 +45,7 @@ def xyz(matrix: Sequence[Tuple[str, float, float, float]], charge=0, radical=0, atoms[n]._charge = c charge = sum(atom_charge) + mol.calc_labels() pb = possible_bonds(array(list(conformer.values())), array([a.atomic_radius for a in atoms.values()]), radius_multiplier) diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 88ca210e..7818af9a 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -118,6 +118,20 @@ def mdl_isotope(self) -> int: MDL MOL common isotope """ + @property + def is_forming_single_bonds(self) -> bool: + """ + Atom can form stable covalent single bonds in molecules + """ + return False + + @property + def is_forming_double_bonds(self) -> bool: + """ + Atom can form stable covalent double bonds in molecules + """ + return False + @property def charge(self) -> int: """ diff --git a/chython/periodictable/groupI.py b/chython/periodictable/groupI.py index a0505f20..df3631f2 100644 --- a/chython/periodictable/groupI.py +++ b/chython/periodictable/groupI.py @@ -52,6 +52,10 @@ def atomic_radius(self): def mdl_isotope(self): return 1 + @property + def is_forming_single_bonds(self): + return True + class Li(Element, PeriodII, GroupI): __slots__ = () diff --git a/chython/periodictable/groupXIII.py b/chython/periodictable/groupXIII.py index ef5243a6..e89d745f 100644 --- a/chython/periodictable/groupXIII.py +++ b/chython/periodictable/groupXIII.py @@ -55,6 +55,14 @@ def atomic_radius(self): def mdl_isotope(self): return 11 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Al(Element, PeriodIII, GroupXIII): __slots__ = () diff --git a/chython/periodictable/groupXIV.py b/chython/periodictable/groupXIV.py index bd94ad60..43cca943 100644 --- a/chython/periodictable/groupXIV.py +++ b/chython/periodictable/groupXIV.py @@ -54,6 +54,14 @@ def atomic_radius(self): def mdl_isotope(self): return 12 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Si(Element, PeriodIII, GroupXIV): __slots__ = () @@ -86,6 +94,14 @@ def atomic_radius(self): def mdl_isotope(self): return 28 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Ge(Element, PeriodIV, GroupXIV): __slots__ = () @@ -118,6 +134,14 @@ def atomic_radius(self): def mdl_isotope(self): return 73 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Sn(Element, PeriodV, GroupXIV): __slots__ = () diff --git a/chython/periodictable/groupXV.py b/chython/periodictable/groupXV.py index 700efe89..5f031016 100644 --- a/chython/periodictable/groupXV.py +++ b/chython/periodictable/groupXV.py @@ -55,6 +55,14 @@ def atomic_radius(self): def mdl_isotope(self): return 14 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class P(Element, PeriodIII, GroupXV): __slots__ = () @@ -94,6 +102,14 @@ def atomic_radius(self): def mdl_isotope(self): return 31 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class As(Element, PeriodIV, GroupXV): __slots__ = () @@ -126,6 +142,14 @@ def atomic_radius(self): def mdl_isotope(self): return 75 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Sb(Element, PeriodV, GroupXV): __slots__ = () @@ -159,6 +183,14 @@ def atomic_radius(self): def mdl_isotope(self): return 122 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Bi(Element, PeriodVI, GroupXV): __slots__ = () diff --git a/chython/periodictable/groupXVI.py b/chython/periodictable/groupXVI.py index 85f72a23..0c782531 100644 --- a/chython/periodictable/groupXVI.py +++ b/chython/periodictable/groupXVI.py @@ -55,6 +55,14 @@ def atomic_radius(self): def mdl_isotope(self): return 16 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class S(Element, PeriodIII, GroupXVI): __slots__ = () @@ -235,6 +243,14 @@ def atomic_radius(self): def mdl_isotope(self): return 32 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Se(Element, PeriodIV, GroupXVI): __slots__ = () @@ -298,6 +314,14 @@ def atomic_radius(self): def mdl_isotope(self): return 79 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Te(Element, PeriodV, GroupXVI): __slots__ = () @@ -352,6 +376,14 @@ def atomic_radius(self): def mdl_isotope(self): return 128 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Po(Element, PeriodVI, GroupXVI): __slots__ = () diff --git a/chython/periodictable/groupXVII.py b/chython/periodictable/groupXVII.py index 3eecfc17..3be4f6a7 100644 --- a/chython/periodictable/groupXVII.py +++ b/chython/periodictable/groupXVII.py @@ -54,6 +54,10 @@ def atomic_radius(self): def mdl_isotope(self): return 19 + @property + def is_forming_single_bonds(self): + return True + class Cl(Element, PeriodIII, GroupXVII): __slots__ = () @@ -97,6 +101,14 @@ def atomic_radius(self): def mdl_isotope(self): return 35 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Br(Element, PeriodIV, GroupXVII): __slots__ = () @@ -147,6 +159,14 @@ def atomic_radius(self): def mdl_isotope(self): return 80 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class I(Element, PeriodV, GroupXVII): __slots__ = () @@ -219,6 +239,14 @@ def atomic_radius(self): def mdl_isotope(self): return 127 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class At(Element, PeriodVI, GroupXVII): __slots__ = () @@ -252,6 +280,14 @@ def atomic_radius(self): def mdl_isotope(self): return 210 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Ts(Element, PeriodVII, GroupXVII): __slots__ = () From ebedc7f2b3d6b10a9d918bb43d4c2e899f3543eb Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:30:19 +0100 Subject: [PATCH 22/68] stereo refactored. simplified stereo in queries. now it's users problem to set it right. query isomorphism reduced to query to atom. --- chython/algorithms/isomorphism.py | 4 +- chython/algorithms/mapping/fixmapper.py | 4 +- chython/algorithms/smiles.py | 2 +- .../{stereo/molecule.py => stereo.py} | 864 +++++++++++++----- chython/algorithms/stereo/__init__.py | 23 - chython/algorithms/stereo/graph.py | 467 ---------- chython/containers/molecule.py | 4 +- chython/containers/query.py | 3 +- chython/files/daylight/smiles.py | 4 +- chython/files/libinchi/wrapper.py | 4 +- chython/periodictable/base/query.py | 189 ++-- chython/reactor/base.py | 26 +- chython/utils/rdkit.py | 2 +- 13 files changed, 722 insertions(+), 874 deletions(-) rename chython/algorithms/{stereo/molecule.py => stereo.py} (52%) delete mode 100644 chython/algorithms/stereo/__init__.py delete mode 100644 chython/algorithms/stereo/graph.py diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 4f8c1e74..8c0de0a5 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -271,9 +271,9 @@ class QueryIsomorphism(Isomorphism): def get_mapping(self, other: Union['MoleculeContainer', 'QueryContainer'], /, *, automorphism_filter: bool = True, searching_scope: Optional[Collection[int]] = None, _cython=True): """ - Get self to other Molecule or Query substructure mapping generator. + Get Query to Molecule substructure mapping generator. - :param other: Molecule or Query + :param other: Molecule :param automorphism_filter: Skip matches to the same atoms. :param searching_scope: substructure atoms list to localize isomorphism. """ diff --git a/chython/algorithms/mapping/fixmapper.py b/chython/algorithms/mapping/fixmapper.py index 84768bdc..251eea95 100644 --- a/chython/algorithms/mapping/fixmapper.py +++ b/chython/algorithms/mapping/fixmapper.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -50,7 +50,7 @@ def fix_mapping(self: 'ReactionContainer', *, logging: bool = False) -> \ free_number = count(max(cgr) + 1) components = [(cgr.substructure(c), cgr.augmented_substructure(c, 2), # deep DEPENDS on rules! - set(c)) + c) for c in cgr.substructure(cgr.center_atoms).connected_components] r_atoms = ChainMap(*(x._atoms for x in self.reactants)) diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index b400a259..bbd43dfa 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -402,7 +402,7 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): # allene if n in self._stereo_allenes_terminals: t1, t2 = self._stereo_allenes_terminals[n] - env = self._stereo_allenes[n] + env = self.stereogenic_allenes[n] n1 = next(x for x in adjacency[t1] if x in env) n2 = next(x for x in adjacency[t2] if x in env) smi[3] = '@' if self._translate_allene_sign(n, n1, n2) else '@@' diff --git a/chython/algorithms/stereo/molecule.py b/chython/algorithms/stereo.py similarity index 52% rename from chython/algorithms/stereo/molecule.py rename to chython/algorithms/stereo.py index 9415d551..7421d3f5 100644 --- a/chython/algorithms/stereo/molecule.py +++ b/chython/algorithms/stereo.py @@ -20,10 +20,9 @@ from functools import cached_property from itertools import combinations, product from logging import getLogger, INFO -from typing import Dict, Set, Tuple, Union, TYPE_CHECKING -from .graph import Stereo -from ..morgan import _morgan -from ...exceptions import AtomNotFound, IsChiral, NotChiral +from typing import Dict, Set, Tuple, Union, List, Optional, TYPE_CHECKING +from .morgan import _morgan +from ..exceptions import AtomNotFound, IsChiral, NotChiral logger = getLogger('chython.stereo') @@ -34,6 +33,30 @@ from chython import MoleculeContainer +# 1 2 +# \ | +# \| +# n---3 +# / +# / +# 0 +_tetrahedron_translate = {(0, 1, 2): False, (1, 2, 0): False, (2, 0, 1): False, + (0, 2, 1): True, (1, 0, 2): True, (2, 1, 0): True, + (0, 3, 1): False, (3, 1, 0): False, (1, 0, 3): False, + (0, 1, 3): True, (1, 3, 0): True, (3, 0, 1): True, + (0, 2, 3): False, (2, 3, 0): False, (3, 0, 2): False, + (0, 3, 2): True, (3, 2, 0): True, (2, 0, 3): True, + (1, 3, 2): False, (3, 2, 1): False, (2, 1, 3): False, + (1, 2, 3): True, (2, 3, 1): True, (3, 1, 2): True} +# 2 1 +# \ / +# n---m +# / \ +# 0 3 +_alkene_translate = {(0, 1): False, (1, 0): False, (0, 3): True, (3, 0): True, + (2, 3): False, (3, 2): False, (2, 1): True, (1, 2): True} + + def _pyramid_sign(n, u, v, w): # # | n / @@ -121,9 +144,245 @@ def _allene_sign(mark, u, v, w): return 0 -class MoleculeStereo(Stereo): +class MoleculeStereo: __slots__ = () + def clean_stereo(self: 'MoleculeContainer'): + """ + Remove stereo data. + """ + for a in self._atoms.values(): + a._stereo = None + for _, bs in self._bonds: + for b in bs.values(): + b._stereo = None # flush twice, but it should be still faster + self.flush_cache(keep_sssr=True, keep_components=True) + + @cached_property + def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: + """ + Carbon sp3 atom numbers. + """ + tetra = [] + for n, atom in self._atoms.items(): + if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: + env = self._bonds[n] + if all(b == 1 for b in env.values()): + if sum(int(b) for b in env.values()) > 4: + continue + tetra.append(n) + return tuple(tetra) + + @cached_property + def cumulenes(self: 'MoleculeContainer') -> List[Tuple[int, ...]]: + """ + All double-bonds chains (e.g. alkenes, allenes, cumulenes). + """ + atoms = self._atoms + bonds = self._bonds + + adj = defaultdict(set) # double bonds adjacency matrix + for n, atom in atoms.items(): + if atom.is_forming_double_bonds: + adj_n = adj[n].add + for m, bond in bonds[n].items(): + if bond == 2 and atoms[m].is_forming_double_bonds: + adj_n(m) + if not adj: + return [] + + terminals = [x for x, y in adj.items() if len(y) == 1] # list to keep atoms order! + cumulenes = [] + while terminals: + n = terminals.pop() + m = adj[n].pop() + path = [n, m] + while m not in terminals: + if len(bonds[m]) > 2: # not cumulene. SO3, SO4- etc. + cumulenes.extend(zip(path, path[1:])) # keep single double bonds instead of cumulene chain. + break + adj_m = adj[m] + adj_m.discard(n) + n, m = m, adj_m.pop() + path.append(m) + else: + terminals.remove(m) + adj[m].pop() + cumulenes.append(tuple(path)) + return cumulenes + + @cached_property + def stereogenic_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int, int], Tuple[int, int, int, int]]]: + """ + Tetrahedrons which contains at least 3 non-hydrogen neighbors and corresponding neighbors order. + """ + # 2 + # | + # 1--K--3 + # | + # 4? + atoms = self._atoms + bonds = self._bonds + tetrahedrons = {} + for n in self.tetrahedrons: + if any(not atoms[x].is_forming_single_bonds for x in bonds[n]): + continue # skip metal-carbon complexes + env = tuple(x for x in bonds[n] if atoms[x].atomic_number != 1) + if len(env) in (3, 4): + tetrahedrons[n] = env + return tetrahedrons + + @cached_property + def stereogenic_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, ...], Tuple[int, int, Optional[int], Optional[int]]]: + """ + Cumulenes which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + # 5 4 + # \ / + # 2---3 + # / \ + # 1 6 + bonds = self._bonds + atoms = self._atoms + cumulenes = {} + for path in self.cumulenes: + nf = bonds[path[0]] + nl = bonds[path[-1]] + n1, m1 = path[1], path[-2] + if any(b == 3 or not atoms[m].is_forming_single_bonds and b != 8 + for m, b in nf.items() if m != n1): + continue # skip X=C=C structures and metal-carbon complexes + if any(b == 3 or not atoms[m].is_forming_single_bonds and b != 8 + for m, b in nl.items() if m != m1): + continue # skip X=C=C structures and metal-carbon complexes + nn = [x for x, b in nf.items() if x != n1 and atoms[x].atomic_number != 1 and b != 8] + mn = [x for x, b in nl.items() if x != m1 and atoms[x].atomic_number != 1 and b != 8] + if nn and mn: + sn = nn[1] if len(nn) == 2 else None + sm = mn[1] if len(mn) == 2 else None + cumulenes[path] = (nn[0], mn[0], sn, sm) + return cumulenes + + @cached_property + def stereogenic_allenes(self) -> Dict[int, Tuple[int, int, Optional[int], Optional[int]]]: + """ + Allenes which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + return {path[len(path) // 2]: env for path, env in self.stereogenic_cumulenes.items() if len(path) % 2} + + @cached_property + def stereogenic_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[int], Optional[int]]]: + """ + Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + stereo = {} + for path, env in self.stereogenic_cumulenes.items(): + if len(path) % 2: + continue + stereo[(path[0], path[-1])] = env + return stereo + + @cached_property + def ring_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int], Tuple[int], Tuple]]: + """ + Tetrahedrons in rings, except ring-linkers. Values are non-ring atoms. + """ + out = {} + atoms_rings = self.atoms_rings + tetrahedrons = self.stereogenic_tetrahedrons + points = self.rings_linker_tetrahedrons + environment = self.not_special_connectivity + for n, r in atoms_rings.items(): + if n in tetrahedrons and n not in points: + out[n] = tuple(environment[n].difference(atoms_rings)) + return out + + @cached_property + def rings_linker_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Tuple[int, int, int, int]]: + """ + A dictionary where the keys are tetrahedron atoms shared between two rings (not condensed rings) and the values + are tuples representing their neighbors in the first and second rings respectively. + """ + out = {} + tetrahedrons = self.stereogenic_tetrahedrons + for n, r in self.atoms_rings.items(): + if n in tetrahedrons: + for nr, mr in combinations(r, 2): + if len(set(nr).intersection(mr)) == 1: + ni = nr.index(n) + mi = mr.index(n) + out[n] = (nr[ni - 1], nr[ni - len(nr) + 1], mr[mi - 1], mr[mi - len(mr) + 1]) + break + return out + + @cached_property + def ring_cumulenes_terminals(self: 'MoleculeContainer') -> Set[Tuple[int, int]]: + """ + Terminal atoms of inside ring cumulenes. + """ + out = set() + ar = self.atoms_rings + for n, *_, m in self.stereogenic_cumulenes: + if n in ar and m in ar and not set(ar[n]).isdisjoint(ar[m]): + out.add((n, m)) + return out + + @cached_property + def rings_linker_cumulenes_terminals(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Tuple[int, int, int, int]]: + """ + Terminal atoms of cumulenes connecting two rings. Values are neighbors in first and second rings. + """ + out = {} + ar = self.atoms_rings + chord = self.ring_cumulenes_terminals + for (n, *_, m), (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): + if n in ar and m in ar and (n, m) not in chord: + out[(n, m)] = (n1, n2, m1, m2) + return out + + @cached_property + def ring_attached_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Union[Tuple[int, int], Tuple[int]]]: + """ + Cumulenes attached to rings from one side. Values are out of ring neighbor atoms. + """ + ar = self.atoms_rings + out = {} + for (n, *_, m), (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): + if n in ar: + if m in ar: + continue + if m2: + out[(n, m)] = (m1, m2) + else: + out[(n, m)] = (m1,) + elif m in ar: + if n2: + out[(n, m)] = (n1, n2) + else: + out[(n, m)] = (n1,) + return out + + @property + def chiral_tetrahedrons(self) -> Set[int]: + """ + Chiral tetrahedrons except already labeled ones. + """ + return self.__chiral_centers[0] + + @property + def chiral_cis_trans(self) -> Set[Tuple[int, int]]: + """ + Chiral cis-trans bonds except already labeled ones. + """ + return self.__chiral_centers[1] + + @property + def chiral_allenes(self) -> Set[int]: + """ + Chiral allenes except already labeled ones. + """ + return self.__chiral_centers[2] + def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cache=True): """ Add stereo data by wedge notation of bonds. Use it for tetrahedrons of allenes. @@ -132,73 +391,78 @@ def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cac :param m: number of atom to which wedge bond coming :param mark: up bond is 1, down is -1 """ - if n not in self._atoms: + atoms = self._atoms + if n not in atoms or m not in atoms or m not in self._bonds[n]: raise AtomNotFound - if n in self._atoms_stereo: + elif atoms[n].stereo is not None: raise IsChiral + elif c := self._stereo_allenes_centers.get(n): + # allenes + if atoms[c].stereo is not None: + raise IsChiral + elif c not in self.chiral_allenes: + raise NotChiral - plane = self._plane - if n in self._chiral_tetrahedrons: - if m not in self._bonds[n]: - raise AtomNotFound - th = self._stereo_tetrahedrons[n] - if self._atoms[m].atomic_number == 1: - s = _pyramid_sign((*plane[m], mark), *((*plane[x], 0) for x in th)) + t1, t2 = self._stereo_allenes_terminals[c] + order = self.stereogenic_allenes[c] + if atoms[m].atomic_number == 1: + if t1 == n: + m1 = order[1] + else: + t1, t2 = t2, t1 + m1 = order[0] + r = True else: - order = [(*plane[x], mark if x == m else 0) for x in th] + w = order.index(m) + if w == 0: + m1 = order[1] + r = False + elif w == 1: + m1 = order[0] + t1, t2 = t2, t1 + r = False + elif w == 2: + m1 = order[1] + r = True + else: + m1 = order[0] + t1, t2 = t2, t1 + r = True + if s := _allene_sign(mark, atoms[t1].xy, atoms[t2].xy, atoms[m1].xy): + atoms[c]._stereo = s < 0 if r else s > 0 + if clean_cache: + self.flush_cache(keep_sssr=True, keep_components=True) + # tetrahedrons + elif n in self.chiral_tetrahedrons: + th = self.stereogenic_tetrahedrons[n] + am = atoms[m] + if am.atomic_number == 1: + order = [] + for x in th: + ax = atoms[x] + order.append((ax.x, ax.y, 0)) + s = _pyramid_sign((am.x, am.y, mark), *order) + else: + order = [] + for x in th: + ax = atoms[x] + order.append((ax.x, ax.y, mark if x == m else 0)) if len(order) == 3: if len(self._bonds[n]) == 4: # explicit hydrogen x = next(x for x in self._bonds[n] if x not in th) - s = _pyramid_sign((*plane[x], 0), *order) + ax = atoms[x] + s = _pyramid_sign((ax.x, ax.y, 0), *order) else: - s = _pyramid_sign((*plane[n], 0), *order) + an = atoms[n] + s = _pyramid_sign((an.x, an.y, 0), *order) else: s = _pyramid_sign(order[-1], *order[:3]) if s: - self._atoms_stereo[n] = s > 0 + atoms[n]._stereo = s > 0 if clean_cache: - self.flush_cache() + self.flush_cache(keep_components=True, keep_sssr=True) else: - c = self._stereo_allenes_centers.get(n) - if c: - if c in self._allenes_stereo: - raise IsChiral - elif c not in self._chiral_allenes: - raise NotChiral - - t1, t2 = self._stereo_allenes_terminals[c] - order = self._stereo_allenes[c] - if self._atoms[m].atomic_number == 1: - if t1 == n: - m1 = order[1] - else: - t1, t2 = t2, t1 - m1 = order[0] - r = True - else: - w = order.index(m) - if w == 0: - m1 = order[1] - r = False - elif w == 1: - m1 = order[0] - t1, t2 = t2, t1 - r = False - elif w == 2: - m1 = order[1] - r = True - else: - m1 = order[0] - t1, t2 = t2, t1 - r = True - s = _allene_sign(mark, plane[t1], plane[t2], plane[m1]) - if s: - self._allenes_stereo[c] = s < 0 if r else s > 0 - if clean_cache: - self.flush_cache() - else: - # only tetrahedrons and allenes supported - raise NotChiral + raise NotChiral def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): """ @@ -206,11 +470,11 @@ def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): """ atoms = self._atoms flag = False - while self._chiral_cis_trans: + while self.chiral_cis_trans: stereo = False - for nm in self._chiral_cis_trans: + for nm in self.chiral_cis_trans: n, m = nm - n1, m1, *_ = self._stereo_cis_trans[nm] + n1, m1, *_ = self.stereogenic_cis_trans[nm] s = _cis_trans_sign(atoms[n1].xy, atoms[n].xy, atoms[m].xy, atoms[m1].xy) if s: stereo = True @@ -222,7 +486,7 @@ def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): else: break if flag and clean_cache: - self.flush_cache() + self.flush_cache(keep_components=True, keep_sssr=True) def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mark: bool, *, clean_cache=True): """ @@ -243,14 +507,14 @@ def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mar if not isinstance(mark, bool): raise TypeError('stereo mark should be bool') - if n in self._chiral_tetrahedrons: + if n in self.chiral_tetrahedrons: atom._stereo = self._translate_tetrahedron_sign(n, env, mark) if clean_cache: - self.flush_cache() - elif n in self._chiral_allenes: + self.flush_cache(keep_components=True, keep_sssr=True) + elif n in self.chiral_allenes: atom._stereo = self._translate_allene_sign(n, *env, mark) if clean_cache: - self.flush_cache() + self.flush_cache(keep_components=True, keep_sssr=True) else: # only tetrahedrons supported raise NotChiral @@ -281,14 +545,14 @@ def add_cis_trans_stereo(self: 'MoleculeContainer', n: int, m: int, n1: int, n2: if self._bonds[i][j].stereo is not None: raise IsChiral - if (n, m) in self._chiral_cis_trans: - self._bonds[i][j] = self._translate_cis_trans_sign(n, m, n1, n2, mark) + if (n, m) in self.chiral_cis_trans: + self._bonds[i][j]._stereo = self._translate_cis_trans_sign(n, m, n1, n2, mark) if clean_cache: - self.flush_cache() - elif (m, n) in self._chiral_cis_trans: - self._bonds[i][j] = self._translate_cis_trans_sign(m, n, n2, n1, mark) + self.flush_cache(keep_components=True, keep_sssr=True) + elif (m, n) in self.chiral_cis_trans: + self._bonds[i][j]._stereo = self._translate_cis_trans_sign(m, n, n2, n1, mark) if clean_cache: - self.flush_cache() + self.flush_cache(keep_components=True, keep_sssr=True) else: raise NotChiral @@ -303,55 +567,58 @@ def fix_stereo(self: 'MoleculeContainer'): """ Reset stereo marks. """ - if self._atoms_stereo: # filter tetrahedrons - stereo_tetrahedrons = self._stereo_tetrahedrons - atoms_stereo = {k: v for k, v in self._atoms_stereo.items() if k in stereo_tetrahedrons} - self._atoms_stereo = self_atoms_stereo = {} - else: - atoms_stereo = {} - - if self._allenes_stereo: # filter allenes - stereo_allenes = self._stereo_allenes - allenes_stereo = {k: v for k, v in self._allenes_stereo.items() if k in stereo_allenes} - self._allenes_stereo = self_allenes_stereo = {} - else: - allenes_stereo = {} - - if self._cis_trans_stereo: # filter cis-trans - stereo_cis_trans = self._stereo_cis_trans - cis_trans_stereo = {k: v for k, v in self._cis_trans_stereo.items() if k in stereo_cis_trans} - self._cis_trans_stereo = self_stereo_cis_trans = {} - else: - cis_trans_stereo = {} + stereo_tetrahedrons = self.stereogenic_tetrahedrons + stereo_allenes = self.stereogenic_allenes + stereo_cis_trans = self._stereo_cis_trans_terminals + atoms_stereo = [] + allenes_stereo = [] + cis_trans_stereo = [] + for n, a in self._atoms.items(): + if a.stereo is None: + continue + elif n in stereo_tetrahedrons: + atoms_stereo.append((n, a, a.stereo)) + elif n in stereo_allenes: + allenes_stereo.append((n, a, a.stereo)) + a._stereo = None # flush stereo label + + for n, m, b in self.bonds(): + if b.stereo is None: + continue + elif ta := stereo_cis_trans.get(n): + cis_trans_stereo.append((ta, b, b.stereo)) + b._stereo = None # flush stereo label + self.flush_stereo_cache() old_stereo = len(atoms_stereo) + len(allenes_stereo) + len(cis_trans_stereo) while old_stereo: - chiral_tetrahedrons = self._chiral_tetrahedrons - chiral_allenes = self._chiral_allenes - chiral_cis_trans = self._chiral_cis_trans + chiral_tetrahedrons = self.chiral_tetrahedrons + chiral_allenes = self.chiral_allenes + chiral_cis_trans = self.chiral_cis_trans - tmp = {} - for n, s in atoms_stereo.items(): + # filter out resolved + tmp = [] + for n, a, s in atoms_stereo: if n in chiral_tetrahedrons: - self_atoms_stereo[n] = s + a._stereo = s # restore stereo else: - tmp[n] = s + tmp.append((n, a, s)) atoms_stereo = tmp - tmp = {} - for n, s in allenes_stereo.items(): + tmp = [] + for n, a, s in allenes_stereo: if n in chiral_allenes: - self_allenes_stereo[n] = s + a._stereo = s # restore stereo else: - tmp[n] = s + tmp.append((n, a, s)) allenes_stereo = tmp - tmp = {} - for n, s in cis_trans_stereo.items(): - if n in chiral_cis_trans: - self_stereo_cis_trans[n] = s + tmp = [] + for ta, b, s in cis_trans_stereo: + if ta in chiral_cis_trans: + b._stereo = s else: - tmp[n] = s + tmp.append((ta, b, s)) cis_trans_stereo = tmp fail_stereo = len(atoms_stereo) + len(allenes_stereo) + len(cis_trans_stereo) @@ -360,26 +627,236 @@ def fix_stereo(self: 'MoleculeContainer'): old_stereo = fail_stereo self.flush_stereo_cache() + @cached_property + def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: + """ + Cis-Trans terminal atoms to cis-trans key mapping. Key is central double bond in a cumulene chain. + """ + terminals = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + i = len(path) // 2 + terminals[n] = terminals[m] = (path[i - 1], path[i]) + return terminals + + @cached_property + def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: + """ + Cis-Trans terminal and central atoms to terminal pair mapping. + """ + terminals = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + i = len(path) // 2 + terminals[n] = terminals[m] = terminals[path[i]] = terminals[path[i - 1]] = (n, m) + return terminals + + @cached_property + def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: + """ + Cis-Trans terminal atoms counterparts + """ + counterpart = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + counterpart[n] = m + counterpart[m] = n + return counterpart + + @cached_property + def _stereo_allenes_centers(self) -> Dict[int, int]: + """ + Allene terminal atom to center mapping + """ + terminals = {} + for c, (n, m) in self._stereo_allenes_terminals.items(): + terminals[n] = terminals[m] = c + return terminals + + @cached_property + def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: + """ + Allene center atom to terminals mapping + """ + return {path[len(path) // 2]: (path[0], path[-1]) for path in self.stereogenic_cumulenes if len(path) % 2} + + def _translate_tetrahedron_sign(self: 'MoleculeContainer', n, env, s=None): + """ + Get sign of chiral tetrahedron atom for specified neighbors order + + :param n: stereo atom + :param env: neighbors order + :param s: if None, use existing sign else translate given to molecule + """ + if s is None: + s = self._atoms[n].stereo + if s is None: + raise KeyError + + order = self.stereogenic_tetrahedrons[n] + if len(order) == 3: + if len(env) == 4: # hydrogen atom passed to env + # hydrogen always last in order + try: + order = (*order, next(x for x in env if self._atoms[x].atomic_number == 1)) # see translate scheme + except StopIteration: + raise KeyError + elif len(env) != 3: # pyramid or tetrahedron expected + raise ValueError('invalid atoms list') + elif len(env) not in (3, 4): # pyramid or tetrahedron expected + raise ValueError('invalid atoms list') + + translate = tuple(order.index(x) for x in env[:3]) + if _tetrahedron_translate[translate]: + return not s + return s + + def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): + """ + Get sign for specified opposite neighbors + + :param n: first double bonded atom + :param m: last double bonded atom + :param nn: neighbor of first atom + :param nm: neighbor of last atom + :param s: if None, use existing sign else translate given to molecule + """ + try: + n0, n1, n2, n3 = self.stereogenic_cis_trans[(n, m)] + except KeyError: + n0, n1, n2, n3 = self.stereogenic_cis_trans[(m, n)] + n, m = m, n # in alkenes sign not order depended + nn, nm = nm, nn + + if s is None: + i, j = self._stereo_cis_trans_centers[n] + s = self._bonds[i][j].stereo + if s is None: + raise KeyError + + if nn == n0: # same start + t0 = 0 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + t1 = 3 + else: + raise KeyError + elif nn == n1: + t0 = 1 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + t1 = 2 + else: + raise KeyError + elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: + t0 = 2 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + t1 = 3 + else: + raise KeyError + elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: + t0 = 3 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + t1 = 2 + else: + raise KeyError + else: + raise KeyError + + if _alkene_translate[(t0, t1)]: + return not s + return s + + def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): + """ + get sign for specified opposite neighbors + + :param c: central double bonded atom + :param nn: neighbor of first double bonded atom + :param nm: neighbor of last double bonded atom + :param s: if None, use existing sign else translate given to molecule + """ + if s is None: + s = self._atoms[c].stereo + if s is None: + raise KeyError + + n0, n1, n2, n3 = self.stereogenic_allenes[c] + if nn == n0: # same start + t0 = 0 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + t1 = 3 + else: + raise KeyError + elif nn == n1: + t0 = 1 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + t1 = 2 + else: + raise KeyError + elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: + t0 = 2 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + t1 = 3 + else: + raise KeyError + elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: + t0 = 3 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + t1 = 2 + else: + raise KeyError + else: + raise KeyError + + if _alkene_translate[(t0, t1)]: + return not s + return s + @cached_property def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): - atoms_stereo = self._atoms_stereo - allenes_centers = self._stereo_allenes_centers atoms = self._atoms + overlap = set() space = [] solved = [] seen = set() - for n, s in self._allenes_stereo.items(): - env = self._stereo_allenes[n] + for n, env in self.stereogenic_allenes.items(): + if atoms[n].stereo is None: + continue term = self._stereo_allenes_terminals[n] + overlap.update(term) # don't allow incoming wedge to allenes terminals orders = [(*env[:2], *term, n, True), (*env[1::-1], *term[::-1], n, True)] if env[2]: orders.append((env[2], env[1], *term, n, True)) if env[3]: orders.append((env[3], env[0], *term[::-1], n, True)) space.append(orders) - for n, s in atoms_stereo.items(): - order = list(self._stereo_tetrahedrons[n]) + for n, env in self.stereogenic_tetrahedrons.items(): + if atoms[n].stereo is None: + continue + overlap.add(n) # don't allow incoming wedge to stereo tetrahedrons + order = list(env) orders = [(*order, n, False)] for _ in range(1, len(order)): order = order.copy() @@ -394,20 +871,22 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): good = [] if orders[0][-1]: for x in orders: - if (x0 := x[0]) in seen or x0 not in atoms_stereo and x0 not in allenes_centers: + x0 = x[0] + if x0 in seen or x0 not in overlap: good.append(x) seen.add(x[2]) if good: - solved.append(max(good, key=lambda x: (atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) + solved.append(max(good, key=lambda x: (not atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) else: unsolved.append(orders) else: for x in orders: - if (x0 := x[0]) in seen or x0 not in atoms_stereo and x0 not in allenes_centers: + x0 = x[0] + if x0 in seen or x0 not in overlap: good.append(x) if good: seen.add(x[-2]) - solved.append(max(good, key=lambda x: (atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) + solved.append(max(good, key=lambda x: (not atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) else: unsolved.append(orders) space = unsolved @@ -441,7 +920,7 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): def __wedge_sign(self: 'MoleculeContainer', order): if order[-1]: # allene - s = self._translate_allene_sign(order[-2], *order[:2]) + s = self._translate_allene_sign(order[-2], order[0], order[1]) v = _allene_sign(1, self._atoms[order[2]].xy, self._atoms[order[3]].xy, self._atoms[order[1]].xy) if not v: logger.info(f'need 2d clean. allenes wedge stereo ambiguous for atom {order[-2]}') @@ -453,16 +932,21 @@ def __wedge_sign(self: 'MoleculeContainer', order): n = order[-2] s = self._translate_tetrahedron_sign(n, order[:-2]) # need recalculation if XY changed + ao0 = self._atoms[order[0]] + ao1 = self._atoms[order[1]] + ao2 = self._atoms[order[2]] if len(order) == 5: - v = _pyramid_sign((*self._atoms[n].xy, 0), - (*self._atoms[order[0]].xy, 1), - (*self._atoms[order[1]].xy, 0), - (*self._atoms[order[2]].xy, 0)) + an = self._atoms[n] + v = _pyramid_sign((an.x, an.y, 0), + (ao0.x, ao0.y, 1), + (ao1.x, ao1.y, 0), + (ao2.x, ao2.y, 0)) else: - v = _pyramid_sign((*self._atoms[order[3]].xy, 0), - (*self._atoms[order[0]].xy, 1), - (*self._atoms[order[1]].xy, 0), - (*self._atoms[order[2]].xy, 0)) + ao3 = self._atoms[order[3]] + v = _pyramid_sign((ao3.x, ao3.y, 0), + (ao0.x, ao0.y, 1), + (ao1.x, ao1.y, 0), + (ao2.x, ao2.y, 0)) if not v: logger.info(f'need 2d clean. tetrahedron wedge stereo ambiguous for atom {n}') if s: @@ -470,18 +954,6 @@ def __wedge_sign(self: 'MoleculeContainer', order): else: return n, order[0], -v - @property - def _chiral_tetrahedrons(self) -> Set[int]: - return self.__chiral_centers[0] - - @property - def _chiral_cis_trans(self) -> Set[Tuple[int, int]]: - return self.__chiral_centers[1] - - @property - def _chiral_allenes(self) -> Set[int]: - return self.__chiral_centers[2] - @cached_property def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} @@ -516,99 +988,11 @@ def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[i morgan = _morgan(morgan, self.int_adjacency) return morgan - @cached_property - def _rings_tetrahedrons_linkers(self: 'MoleculeContainer') -> Dict[int, Tuple[int, int, int, int]]: - """ - Ring-linkers tetrahedrons. - - Values are neighbors in first and second rings. - """ - out = {} - tetrahedrons = self._stereo_tetrahedrons - for n, r in self.atoms_rings.items(): - if n in tetrahedrons: - for nr, mr in combinations(r, 2): - if len(set(nr).intersection(mr)) == 1: - ni = nr.index(n) - mi = mr.index(n) - out[n] = (nr[ni - 1], nr[ni - len(nr) + 1], mr[mi - 1], mr[mi - len(mr) + 1]) - break - return out - - @cached_property - def _rings_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int], Tuple[int], Tuple]]: - """ - Tetrahedrons in rings, except ring-linkers. - - Values are out of ring atoms. - """ - out = {} - atoms_rings = self.atoms_rings - tetrahedrons = self._stereo_tetrahedrons - points = self._rings_tetrahedrons_linkers - environment = self.not_special_connectivity - for n, r in atoms_rings.items(): - if n in tetrahedrons and n not in points: - out[n] = tuple(environment[n].difference(atoms_rings)) - return out - - @cached_property - def _rings_cumulenes_linkers(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Tuple[int, int, int, int]]: - """ - Ring-linkers cumulenes except chords. - - Values are neighbors in first and second rings. - """ - out = {} - ar = self.atoms_rings - chord = self._rings_cumulenes - for (n, *_, m), (n1, m1, n2, m2) in self._stereo_cumulenes.items(): - if n in ar and m in ar and (n, m) not in chord: - out[(n, m)] = (n1, n2, m1, m2) - return out - - @cached_property - def _rings_cumulenes(self: 'MoleculeContainer') -> Set[Tuple[int, int]]: - """ - Cumulenes in rings always chiral. - """ - out = set() - ar = self.atoms_rings - for n, *_, m in self._stereo_cumulenes: - if n in ar and m in ar and not set(ar[n]).isdisjoint(ar[m]): - out.add((n, m)) - return out - - @cached_property - def _rings_cumulenes_attached(self: 'MoleculeContainer') -> Dict[Tuple[int, int], - Union[Tuple[int, int], Tuple[int]]]: - """ - Cumulenes attached to rings. - - Values are out of ring atoms. - """ - ar = self.atoms_rings - out = {} - for (n, *_, m), (n1, m1, n2, m2) in self._stereo_cumulenes.items(): - if n in ar: - if m in ar: - continue - if m2: - out[(n, m)] = (m1, m2) - else: - out[(n, m)] = (m1,) - elif m in ar: - if n2: - out[(n, m)] = (n1, n2) - else: - out[(n, m)] = (n1,) - return out - @cached_property def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): atoms_rings = self.atoms_rings - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans + tetrahedrons = self.stereogenic_tetrahedrons + cis_trans = self.stereogenic_cis_trans allenes_centers = self._stereo_allenes_centers cis_trans_terminals = self._stereo_cis_trans_terminals cis_trans_centers = self._stereo_cis_trans_centers @@ -618,7 +1002,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): # tetrahedron is chiral if all its neighbors are unique. chiral_t = {n for n, env in tetrahedrons.items() if len({morgan[x] for x in env}) == len(env)} # tetrahedrons-linkers is chiral if in each rings neighbors are unique. - chiral_t.update(n for n, (n1, n2, m1, m2) in self._rings_tetrahedrons_linkers.items() + chiral_t.update(n for n, (n1, n2, m1, m2) in self.rings_linker_tetrahedrons.items() if morgan[n1] != morgan[n2] and morgan[m1] != morgan[m2]) # required for axes detection. @@ -630,7 +1014,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): # ring-linkers and rings-attached also takes into account. chiral_c = set() chiral_a = set() - for path, (n1, m1, n2, m2) in self._stereo_cumulenes.items(): + for path, (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): if morgan[n1] != morgan.get(n2, 0) and morgan[m1] != morgan.get(m2, 0): n, m = path[0], path[-1] if len(path) % 2: @@ -640,7 +1024,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): stereogenic.add(n) stereogenic.add(m) # ring cumulenes always chiral. can be already added. - for nm in self._rings_cumulenes: + for nm in self.ring_cumulenes_terminals: n, m = nm if any(len(x) < 8 for x in atoms_rings[n]): # skip small rings. if n in chiral_c: # remove already added small rings cumulenes. @@ -660,22 +1044,22 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): # find chiral axes. build graph of stereogenic atoms in rings. # atoms connected then located in same ring or cumulene. - for n, env in self._rings_tetrahedrons.items(): + for n, env in self.ring_tetrahedrons.items(): if len(env) == 2: # one or zero non-ring neighbors stereogenic. n1, n2 = env if morgan[n1] == morgan[n2]: # only unique non-ring members required. continue graph[n] = set() stereogenic.add(n) # non-linker tetrahedrons in rings - stereogenic. - for n, (n1, n2, m1, m2) in self._rings_tetrahedrons_linkers.items(): + for n, (n1, n2, m1, m2) in self.rings_linker_tetrahedrons.items(): graph[n] = set() if morgan[n1] != morgan[n2] or morgan[m1] != morgan[m2]: stereogenic.add(n) # linkers with at least one unsymmetric ring. - for n, m in self._rings_cumulenes_linkers: + for n, m in self.rings_linker_cumulenes_terminals: graph[n] = {m} graph[m] = {n} # stereogenic atoms already found. - for (n, m), env in self._rings_cumulenes_attached.items(): + for (n, m), env in self.ring_attached_cumulenes.items(): if len(env) == 2: n1, n2 = env if morgan[n1] == morgan[n2]: # only unique non-ring members required. @@ -729,9 +1113,9 @@ def __differentiation(self: Union['MoleculeStereo', 'MoleculeContainer'], morgan atoms_stereo, cis_trans_stereo, allenes_stereo): bonds = self.int_adjacency - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans - allenes = self._stereo_allenes + tetrahedrons = self.stereogenic_tetrahedrons + cis_trans = self.stereogenic_cis_trans + allenes = self.stereogenic_allenes translate_tetrahedron = self._translate_tetrahedron_sign translate_cis_trans = self._translate_cis_trans_sign diff --git a/chython/algorithms/stereo/__init__.py b/chython/algorithms/stereo/__init__.py deleted file mode 100644 index 18f784a7..00000000 --- a/chython/algorithms/stereo/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2021 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from .graph import * -from .molecule import * - - -__all__ = ['MoleculeStereo', 'Stereo'] diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py deleted file mode 100644 index 59523deb..00000000 --- a/chython/algorithms/stereo/graph.py +++ /dev/null @@ -1,467 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019-2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from collections import defaultdict -from functools import cached_property -from typing import Dict, Optional, Tuple, TYPE_CHECKING, Union - - -if TYPE_CHECKING: - from chython import MoleculeContainer, QueryContainer - Container = Union[MoleculeContainer, QueryContainer] - - -_heteroatoms = {5, 6, 7, 8, 14, 15, 16, 17, 33, 34, 35, 52, 53} - -# 1 2 -# \ | -# \| -# n---3 -# / -# / -# 0 -_tetrahedron_translate = {(0, 1, 2): False, (1, 2, 0): False, (2, 0, 1): False, - (0, 2, 1): True, (1, 0, 2): True, (2, 1, 0): True, - (0, 3, 1): False, (3, 1, 0): False, (1, 0, 3): False, - (0, 1, 3): True, (1, 3, 0): True, (3, 0, 1): True, - (0, 2, 3): False, (2, 3, 0): False, (3, 0, 2): False, - (0, 3, 2): True, (3, 2, 0): True, (2, 0, 3): True, - (1, 3, 2): False, (3, 2, 1): False, (2, 1, 3): False, - (1, 2, 3): True, (2, 3, 1): True, (3, 1, 2): True} -# 2 1 -# \ / -# n---m -# / \ -# 0 3 -_alkene_translate = {(0, 1): False, (1, 0): False, (0, 3): True, (3, 0): True, - (2, 3): False, (3, 2): False, (2, 1): True, (1, 2): True} - -# allowed atoms. these atoms have stable covalent bonds. -_organic_subset = {1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 52, 53, 85} - - -class Stereo: - __slots__ = () - - @cached_property - def cumulenes(self) -> Tuple[Tuple[int, ...], ...]: - """ - Alkenes, allenes and cumulenes atoms numbers. - """ - return tuple(self._cumulenes()) - - @cached_property - def tetrahedrons(self: 'Container') -> Tuple[int, ...]: - """ - Carbon sp3 atoms numbers. - """ - tetra = [] - for n, atom in self._atoms.items(): - if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: - env = self._bonds[n] - if all(int(x) == 1 for x in env.values()): - if sum(int(x) for x in env.values()) > 4: - continue - tetra.append(n) - return tuple(tetra) - - def clean_stereo(self: 'Container'): - """ - Remove stereo data. - """ - for a in self._atoms.values(): - a._stereo = None - for _, bs in self._bonds: - for b in bs.values(): - b._stereo = None # flush twice, but it should be still faster - self.flush_cache() - - def get_mapping(self: 'Container', other: 'Container', **kwargs): - atoms_stereo = self._atoms_stereo - allenes_stereo = self._allenes_stereo - cis_trans_stereo = self._cis_trans_stereo - if atoms_stereo or allenes_stereo or cis_trans_stereo: - other_atoms_stereo = other._atoms_stereo - other_allenes_stereo = other._allenes_stereo - other_cis_trans_stereo = other._cis_trans_stereo - other_translate_tetrahedron_sign = other._translate_tetrahedron_sign - other_translate_allene_sign = other._translate_allene_sign - other_translate_cis_trans_sign = other._translate_cis_trans_sign - - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans - allenes = self._stereo_allenes - - for mapping in super().get_mapping(other, **kwargs): - for n, s in atoms_stereo.items(): - m = mapping[n] - if m not in other_atoms_stereo: # self stereo atom not stereo in other - break - # translate stereo mark in other in order of self tetrahedron - if other_translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) != s: - break - else: - for n, s in allenes_stereo.items(): - m = mapping[n] - if m not in other_allenes_stereo: # self stereo allene not stereo in other - break - # translate stereo mark in other in order of self allene - nn, nm, *_ = allenes[n] - if other_translate_allene_sign(m, mapping[nn], mapping[nm]) != s: - break - else: - for nm, s in cis_trans_stereo.items(): - n, m = nm - on, om = mapping[n], mapping[m] - if (on, om) not in other_cis_trans_stereo: - if (om, on) not in other_cis_trans_stereo: - break # self stereo cis_trans not stereo in other - else: - nn, nm, *_ = cis_trans[nm] - if other_translate_cis_trans_sign(om, on, mapping[nm], mapping[nn]) != s: - break - else: - nn, nm, *_ = cis_trans[nm] - if other_translate_cis_trans_sign(on, om, mapping[nn], mapping[nm]) != s: - break - else: - yield mapping - else: - yield from super().get_mapping(other, **kwargs) - - def _translate_tetrahedron_sign(self: 'Container', n, env, s=None): - """ - Get sign of chiral tetrahedron atom for specified neighbors order - - :param n: stereo atom - :param env: neighbors order - :param s: if None, use existing sign else translate given to molecule - """ - if s is None: - s = self._atoms[n].stereo - if s is None: - raise KeyError - - order = self._stereo_tetrahedrons[n] - if len(order) == 3: - if len(env) == 4: # hydrogen atom passed to env - # hydrogen always last in order - try: - order = (*order, next(x for x in env if self._atoms[x].atomic_number == 1)) # see translate scheme - except StopIteration: - raise KeyError - elif len(env) != 3: # pyramid or tetrahedron expected - raise ValueError('invalid atoms list') - elif len(env) not in (3, 4): # pyramid or tetrahedron expected - raise ValueError('invalid atoms list') - - translate = tuple(order.index(x) for x in env[:3]) - if _tetrahedron_translate[translate]: - return not s - return s - - def _translate_cis_trans_sign(self: 'Container', n, m, nn, nm, s=None): - """ - Get sign for specified opposite neighbors - - :param n: first double bonded atom - :param m: last double bonded atom - :param nn: neighbor of first atom - :param nm: neighbor of last atom - :param s: if None, use existing sign else translate given to molecule - """ - try: - n0, n1, n2, n3 = self._stereo_cis_trans[(n, m)] - except KeyError: - n0, n1, n2, n3 = self._stereo_cis_trans[(m, n)] - n, m = m, n # in alkenes sign not order depended - nn, nm = nm, nn - - if s is None: - i, j = self._stereo_cis_trans_centers[n] - s = self._bonds[i][j].stereo - if s is None: - raise KeyError - - if nn == n0: # same start - t0 = 0 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n1: - t0 = 1 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: - t0 = 2 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: - t0 = 3 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - else: - raise KeyError - - if _alkene_translate[(t0, t1)]: - return not s - return s - - def _translate_allene_sign(self: 'Container', c, nn, nm, s=None): - """ - get sign for specified opposite neighbors - - :param c: central double bonded atom - :param nn: neighbor of first double bonded atom - :param nm: neighbor of last double bonded atom - :param s: if None, use existing sign else translate given to molecule - """ - if s is None: - s = self._atoms[c].stereo - if s is None: - raise KeyError - - n0, n1, n2, n3 = self._stereo_allenes[c] - if nn == n0: # same start - t0 = 0 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n1: - t0 = 1 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: - t0 = 2 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: - t0 = 3 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - else: - raise KeyError - - if _alkene_translate[(t0, t1)]: - return not s - return s - - def _cumulenes(self: 'Container', heteroatoms=False): - atoms = self._atoms - bonds = self._bonds - - adj = defaultdict(set) # double bonds adjacency matrix - if heteroatoms: - for n, atom in atoms.items(): - if atom.atomic_number in _heteroatoms: - adj_n = adj[n].add - for m, bond in bonds[n].items(): - if int(bond) == 2 and atoms[m].atomic_number in _heteroatoms: - adj_n(m) - else: - for n, atom in atoms.items(): - if atom.atomic_number == 6: - adj_n = adj[n].add - for m, bond in bonds[n].items(): - if int(bond) == 2 and atoms[m].atomic_number == 6: - adj_n(m) - if not adj: - return () - - terminals = [x for x, y in adj.items() if len(y) == 1] - cumulenes = [] - while terminals: - n = terminals.pop(0) - m = adj[n].pop() - path = [n, m] - while m not in terminals: - adj_m = adj[m] - if len(adj_m) > 2: # not cumulene. SO3 etc. - cumulenes.extend(zip(path, path[1:])) # keep single double bonds. - break - adj_m.discard(n) - n, m = m, adj_m.pop() - path.append(m) - else: - terminals.remove(m) - adj[m].pop() - cumulenes.append(tuple(path)) - return cumulenes - - @cached_property - def _stereo_cumulenes(self: 'Container') -> Dict[Tuple[int, ...], Tuple[int, int, Optional[int], Optional[int]]]: - """ - Cumulenes which contains at least one non-hydrogen neighbor on both ends - """ - # 5 4 - # \ / - # 2---3 - # / \ - # 1 6 - bonds = self._bonds - atoms = self._atoms - cumulenes = {} - for path in self.cumulenes: - nf = bonds[path[0]] - nl = bonds[path[-1]] - n1, m1 = path[1], path[-2] - if any(b.order == 3 or atoms[m].atomic_number not in _organic_subset and b.order != 8 - for m, b in nf.items() if m != n1): - continue # skip X=C=C structures and metal-carbon complexes - if any(b.order == 3 or atoms[m].atomic_number not in _organic_subset and b.order != 8 - for m, b in nl.items() if m != m1): - continue # skip X=C=C structures and metal-carbon complexes - nn = [x for x, b in nf.items() if x != n1 and atoms[x].atomic_number != 1 and b.order != 8] - mn = [x for x, b in nl.items() if x != m1 and atoms[x].atomic_number != 1 and b.order != 8] - if nn and mn: - sn = nn[1] if len(nn) == 2 else None - sm = mn[1] if len(mn) == 2 else None - cumulenes[path] = (nn[0], mn[0], sn, sm) - return cumulenes - - @cached_property - def _stereo_tetrahedrons(self: 'Container') -> Dict[int, Union[Tuple[int, int, int], Tuple[int, int, int, int]]]: - """ - Tetrahedrons which contains at least 3 non-hydrogen neighbors - """ - # 2 - # | - # 1--K--3 - # | - # 4? - atoms = self._atoms - bonds = self._bonds - tetrahedrons = {} - for n in self.tetrahedrons: - if any(atoms[x].atomic_number not in _organic_subset for x in bonds[n]): - continue # skip metal-carbon complexes - env = tuple(x for x in bonds[n] if atoms[x].atomic_number != 1) - if len(env) in (3, 4): - tetrahedrons[n] = env - return tetrahedrons - - @cached_property - def _stereo_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[int], Optional[int]]]: - """ - Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends - """ - stereo = {} - for path, env in self._stereo_cumulenes.items(): - if len(path) % 2: - continue - stereo[(path[0], path[-1])] = env - return stereo - - @cached_property - def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: - """ - Cis-Trans terminal atoms to cis-trans key mapping. Key is central double bond in a cumulene chain. - """ - terminals = {} - for path in self._stereo_cumulenes: - if len(path) % 2: - continue - n, m = path[0], path[-1] - i = len(path) // 2 - terminals[n] = terminals[m] = (path[i - 1], path[i]) - return terminals - - @cached_property - def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: - """ - Cis-Trans terminal and central atoms to terminal pair mapping. - """ - terminals = {} - for path in self._stereo_cumulenes: - if len(path) % 2: - continue - n, m = path[0], path[-1] - i = len(path) // 2 - terminals[n] = terminals[m] = terminals[path[i]] = terminals[path[i - 1]] = (n, m) - return terminals - - @cached_property - def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: - """ - Cis-Trans terminal atoms counterparts - """ - counterpart = {} - for path in self._stereo_cumulenes: - if len(path) % 2: - continue - n, m = path[0], path[-1] - counterpart[n] = m - counterpart[m] = n - return counterpart - - @cached_property - def _stereo_allenes(self) -> Dict[int, Tuple[int, int, Optional[int], Optional[int]]]: - """ - Allenes which contains at least one non-hydrogen neighbor on both ends - """ - return {path[len(path) // 2]: env for path, env in self._stereo_cumulenes.items() if len(path) % 2} - - @cached_property - def _stereo_allenes_centers(self) -> Dict[int, int]: - """ - Allene terminal atom to center mapping - """ - terminals = {} - for c, (n, m) in self._stereo_allenes_terminals.items(): - terminals[n] = terminals[m] = c - return terminals - - @cached_property - def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: - """ - Allene center atom to terminals mapping - """ - return {path[len(path) // 2]: (path[0], path[-1]) for path in self._stereo_cumulenes if len(path) % 2} - - -__all__ = ['Stereo'] diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index fc2c7cb2..b7969687 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -285,7 +285,7 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} # check for full presence of cumulene chains and terminal attachments - for p in self._stereo_cumulenes.values(): + for p in self.stereogenic_cumulenes.values(): if not not_skin.issuperset(p): not_skin.difference_update(p) @@ -554,8 +554,6 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, mol._allenes_stereo = allenes_stereo mol._cis_trans_stereo = cis_trans_stereo - mol._conformers = [] - mol._parsed_mapping = {} mol._MoleculeContainer__meta = None mol._MoleculeContainer__name = None mol._atoms = atoms = {} diff --git a/chython/containers/query.py b/chython/containers/query.py index 7a218786..757925f2 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -21,12 +21,11 @@ from .graph import Graph from ..algorithms.isomorphism import QueryIsomorphism from ..algorithms.smiles import QuerySmiles -from ..algorithms.stereo import Stereo from ..periodictable import Element, QueryElement from ..periodictable.base import Query -class QueryContainer(Stereo, Graph[Query, QueryBond], QueryIsomorphism, QuerySmiles): +class QueryContainer(Graph[Query, QueryBond], QueryIsomorphism, QuerySmiles): __slots__ = () def add_atom(self, atom: Union[Query, Element, int, str], *args, **kwargs): diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 82687724..61f2d6cd 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -175,8 +175,8 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if not stereo_atoms and not data['stereo_bonds']: return - st = molecule._stereo_tetrahedrons - sa = molecule._stereo_allenes + st = molecule.stereogenic_tetrahedrons + sa = molecule.stereogenic_allenes sat = molecule._stereo_allenes_terminals ctc = molecule._stereo_cis_trans_counterpart diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index a3504a0b..aaefb948 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -135,8 +135,8 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if ignore_stereo or not data['stereo_atoms'] and not data['stereo_cumulenes'] and not data['stereo_allenes']: return - st = molecule._stereo_tetrahedrons - sa = molecule._stereo_allenes + st = molecule.stereogenic_tetrahedrons + sa = molecule.stereogenic_allenes ctc = molecule._stereo_cis_trans_counterpart stereo = [] diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 1d00a29b..70d1588e 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -21,10 +21,7 @@ from functools import cached_property from typing import Tuple, Type, List, Union, Optional from .element import Element - - -_inorganic = {'He', 'Ne', 'Ar', 'Kr', 'Xe', 'F', 'Cl', 'Br', 'I', 'B', 'C', 'N', 'O', - 'H', 'Si', 'P', 'S', 'Se', 'Ge', 'As', 'Sb', 'Te', 'At'} +from .groups import GroupXVIII def _validate(value, prop): @@ -229,18 +226,15 @@ def atomic_symbol(self) -> str: return 'M' def __eq__(self, other): - if isinstance(other, Element): - if other.atomic_symbol in _inorganic: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - return True - # metal is subset of metal. only - return (isinstance(other, AnyMetal) - and self.neighbors == other.neighbors - and self.hybridization == other.hybridization) + if not isinstance(other, Element): + return False + if other.is_forming_single_bonds or isinstance(other, GroupXVIII): + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + return True def __hash__(self): return hash((self.neighbors, self.hybridization)) @@ -257,35 +251,27 @@ def __eq__(self, other): """ Compare attached to molecules elements and query elements """ - if isinstance(other, Element): - if self.charge != other.charge: - return False - if self.is_radical != other.is_radical: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if other.ring_sizes.isdisjoint(self.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected + if not isinstance(other, Element): + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + elif other.ring_sizes: # not in ring expected return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - # any is subset of any. only - return (isinstance(other, AnyElement) - and self.charge == other.charge - and self.is_radical == other.is_radical - and self.neighbors == other.neighbors - and self.hybridization == other.hybridization - and self.ring_sizes == other.ring_sizes - and self.implicit_hydrogens == other.implicit_hydrogens - and self.heteroatoms == other.heteroatoms) + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True def __hash__(self): return hash((self.charge, self.is_radical, self.neighbors, self.hybridization, @@ -329,42 +315,29 @@ def __eq__(self, other): """ Compare attached to molecules elements and query elements """ - if isinstance(other, Element): - if other.atomic_number not in self.atomic_numbers: - return False - if self.charge != other.charge: - return False - if self.is_radical != other.is_radical: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if other.ring_sizes.isdisjoint(self.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected + if not isinstance(other, Element): + return False + if other.atomic_number not in self.atomic_numbers: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + elif other.ring_sizes: # not in ring expected return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - # List is subset of Any and List - elif (isinstance(other, (ListElement, AnyElement)) - and self.charge == other.charge - and self.is_radical == other.is_radical - and self.neighbors == other.neighbors - and self.hybridization == other.hybridization - and self.ring_sizes == other.ring_sizes - and self.implicit_hydrogens == other.implicit_hydrogens - and self.heteroatoms == other.heteroatoms): - # list should contain all elements of other list - if isinstance(other, ListElement): - return set(self.atomic_numbers).issubset(other.atomic_numbers) - return True - return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True def __hash__(self): return hash((self.atomic_numbers, self.charge, self.is_radical, self.neighbors, self.hybridization, @@ -475,47 +448,31 @@ def __eq__(self, other): """ compare attached to molecules elements and query elements """ - if isinstance(other, Element): - if self.atomic_number != other.atomic_number: - return False - if self.charge != other.charge: - return False - if self.is_radical != other.is_radical: - return False - if self.isotope and self.isotope != other.isotope: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if other.ring_sizes.isdisjoint(self.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected + if not isinstance(other, Element): + return False + if self.atomic_number != other.atomic_number: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.isotope and self.isotope != other.isotope: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + elif other.ring_sizes: # not in ring expected return False - return True - elif (isinstance(other, ExtendedQuery) - and self.charge == other.charge - and self.is_radical == other.is_radical - and self.neighbors == other.neighbors - and self.hybridization == other.hybridization - and self.ring_sizes == other.ring_sizes - and self.implicit_hydrogens == other.implicit_hydrogens - and self.heteroatoms == other.heteroatoms): - # query element should fully match other query element - if isinstance(other, QueryElement): - return self.atomic_number == other.atomic_number and self.isotope == other.isotope - # query element is subset of any element - elif isinstance(other, AnyElement): - return True - # query element should be in list - return isinstance(other, ListElement) and self.atomic_number in other.atomic_numbers - return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True def __hash__(self): return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.neighbors, diff --git a/chython/reactor/base.py b/chython/reactor/base.py index 073713e4..16f8b918 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # Copyright 2019 Adelia Fatykhova # This file is part of chython. # @@ -206,52 +206,52 @@ def __set_stereo(self, new, structure, mapping): for n, s in products._atoms_stereo.items(): m = mapping[n] new._atoms_stereo[m] = products._translate_tetrahedron_sign(n, [r_mapping[x] for x in - new._stereo_tetrahedrons[m]], s) + new.stereogenic_tetrahedrons[m]], s) stereo_override.add(m) for n, s in products._allenes_stereo.items(): m = mapping[n] - t1, t2, *_ = new._stereo_allenes[m] + t1, t2, *_ = new.stereogenic_allenes[m] new._allenes_stereo[m] = products._translate_allene_sign(n, r_mapping[t1], r_mapping[t2], s) stereo_override.add(m) for (n, m), s in products._cis_trans_stereo.items(): nm = (mapping[n], mapping[m]) try: - t1, t2, *_ = new._stereo_cis_trans[nm] + t1, t2, *_ = new.stereogenic_cis_trans[nm] except KeyError: nm = nm[::-1] - t2, t1, *_ = new._stereo_cis_trans[nm] + t2, t1, *_ = new.stereogenic_cis_trans[nm] new._cis_trans_stereo[nm] = products._translate_cis_trans_sign(n, m, r_mapping[t1], r_mapping[t2], s) stereo_override.update(nm) # set unmatched part stereo and not overridden by patch. for n, s in structure._atoms_stereo.items(): - if n in stereo_override or n not in new._stereo_tetrahedrons or \ + if n in stereo_override or n not in new.stereogenic_tetrahedrons or \ new._bonds[n].keys() != structure._bonds[n].keys(): # skip atoms with changed neighbors continue - new._atoms_stereo[n] = structure._translate_tetrahedron_sign(n, new._stereo_tetrahedrons[n], s) + new._atoms_stereo[n] = structure._translate_tetrahedron_sign(n, new.stereogenic_tetrahedrons[n], s) for n, s in structure._allenes_stereo.items(): - if n in stereo_override or n not in new._stereo_allenes or \ - set(new._stereo_allenes[n]) != set(structure._stereo_allenes[n]): + if n in stereo_override or n not in new.stereogenic_allenes or \ + set(new.stereogenic_allenes[n]) != set(structure.stereogenic_allenes[n]): # skip changed allenes continue - t1, t2, *_ = new._stereo_allenes[n] + t1, t2, *_ = new.stereogenic_allenes[n] new._allenes_stereo[n] = structure._translate_allene_sign(n, t1, t2, s) for nm, s in structure._cis_trans_stereo.items(): n, m = nm if n in stereo_override or m in stereo_override: continue - env = structure._stereo_cis_trans[nm] + env = structure.stereogenic_cis_trans[nm] try: - new_env = new._stereo_cis_trans[nm] + new_env = new.stereogenic_cis_trans[nm] except KeyError: nm = nm[::-1] try: - new_env = new._stereo_cis_trans[nm] + new_env = new.stereogenic_cis_trans[nm] except KeyError: continue t2, t1, *_ = new_env diff --git a/chython/utils/rdkit.py b/chython/utils/rdkit.py index 826387f6..bae12fd9 100644 --- a/chython/utils/rdkit.py +++ b/chython/utils/rdkit.py @@ -152,7 +152,7 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): for nm, s in data._cis_trans_stereo.items(): n, m = nm if m in bonds[n]: # cumulenes unsupported - nn, nm, *_ = data._stereo_cis_trans[nm] + nn, nm, *_ = data.stereogenic_cis_trans[nm] b = mol.GetBondBetweenAtoms(mapping[n], mapping[m]) b.SetStereoAtoms(mapping[nn], mapping[nm]) b.SetStereo(_cis if s else _trans) From 1bf7a687b649fd5361c0c4ecff2c3ab69c400578 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:42:13 +0100 Subject: [PATCH 23/68] fixes --- chython/algorithms/isomorphism.py | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 8c0de0a5..65a20c0e 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -280,25 +280,25 @@ def get_mapping(self, other: Union['MoleculeContainer', 'QueryContainer'], /, *, # _cython - by default cython implementation enabled. # disable it by overriding method if Query Atoms or Containers logic changed. # Lv, Ts and Og in cython optimized mode treated as equal. - if isinstance(other, QueryIsomorphism): - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) - elif isinstance(other, MoleculeIsomorphism): - if _cython: - try: # windows? ;) - from ._isomorphism import get_mapping as _cython_get_mapping - except ImportError: - components = get_mapping = None - else: - components = self._cython_compiled_query # override to cython data + if not isinstance(other, MoleculeIsomorphism): + raise TypeError('MoleculeContainer expected') - def get_mapping(query, scope): - return _cython_get_mapping(*query, *other._cython_compiled_structure, - array('I', [n in scope for n in other])) - else: + if _cython: + try: # windows? ;) + from ._isomorphism import get_mapping as _cython_get_mapping + except ImportError: components = get_mapping = None - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, - components=components, get_mapping=get_mapping) - raise TypeError('MoleculeContainer or QueryContainer expected') + else: + components = self._cython_compiled_query # override to cython data + + def get_mapping(query, scope): + return _cython_get_mapping(*query, *other._cython_compiled_structure, + array('I', [n in scope for n in other])) + else: + components = get_mapping = None + # todo: implement stereo + return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, + components=components, get_mapping=get_mapping) @cached_property def _cython_compiled_query(self): From faf88ac28f27ac7840471eba39f98510023e1773 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:56:43 +0100 Subject: [PATCH 24/68] smiles parser fixed --- chython/files/daylight/smiles.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 61f2d6cd..410df35a 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -175,6 +175,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if not stereo_atoms and not data['stereo_bonds']: return + atoms = molecule._atoms st = molecule.stereogenic_tetrahedrons sa = molecule.stereogenic_allenes sat = molecule._stereo_allenes_terminals @@ -182,10 +183,11 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): order = {mapping[n]: [mapping[m] for m in ms] for n, ms in data['order'].items()} + log = [] stereo = [] for i, s in stereo_atoms: n = mapping[i] - if not i and hydrogens[n]: # first atom in smiles has reversed chiral mark + if not i and atoms[n].implicit_hydrogens: # first atom in smiles has reversed chiral mark s = not s if n in st: @@ -196,6 +198,8 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): n1 = next(x for x in order[t1] if x in env) n2 = next(x for x in order[t2] if x in env) stereo.append((molecule.add_atom_stereo, n, (n1, n2), s)) + else: + log.append(f'non chiral atom {n} has stereo label in smiles') stereo_bonds = {mapping[n]: {mapping[m]: s for m, s in ms.items()} for n, ms in data['stereo_bonds'].items()} From b3fa72ece43fccfaf31e620df92909679a7a23f5 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 23:08:47 +0100 Subject: [PATCH 25/68] smiles generator fixed --- chython/algorithms/smiles.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index bbd43dfa..4f2e14ae 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -448,7 +448,7 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): smi[2] = atom.atomic_symbol return ''.join(smi) - def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): + def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjacency, **kwargs): if not kwargs.get('bonds', True): return '' order = self._bonds[n][m].order @@ -475,14 +475,14 @@ def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): else: # order == 8 return '~' - def __ct_map(self, adjacency): + def __ct_map(self: 'MoleculeContainer', adjacency): stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} if not stereo_bonds: return {} ct_map = {} + sct = self.stereogenic_cis_trans ctc = self._stereo_cis_trans_centers ctt = self._stereo_cis_trans_terminals - sct = self._stereo_cis_trans ctcp = self._stereo_cis_trans_counterpart seen = set() From 57ef18d0d277992e9cb57c48a3602626021b5b46 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 11:02:03 +0100 Subject: [PATCH 26/68] morgan and rings refactored. no need for queries. dropped. --- chython/algorithms/isomorphism.py | 66 +++++++++++++++---------------- chython/algorithms/morgan.py | 6 +-- chython/algorithms/rings.py | 8 ++-- chython/algorithms/smiles.py | 14 +++++-- chython/containers/graph.py | 29 ++------------ chython/containers/molecule.py | 30 ++++++++++++-- 6 files changed, 82 insertions(+), 71 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 65a20c0e..a6ddea3e 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -27,7 +27,7 @@ if TYPE_CHECKING: from chython.containers.graph import Graph - from chython.containers import MoleculeContainer, QueryContainer + from chython.containers import MoleculeContainer class Isomorphism: @@ -49,14 +49,6 @@ def __gt__(self, other): def __ge__(self, other): return other.is_substructure(self) - def __contains__(self: 'Graph', other: Union[Element, Query, str]): - """ - Atom in Structure test. - """ - if isinstance(other, str): - return any(other == x.atomic_symbol for x in self._atoms.values()) - return any(other == x for x in self._atoms.values()) - def is_substructure(self, other, /) -> bool: """ Test self is substructure of other @@ -79,23 +71,7 @@ def is_equal(self, other, /) -> bool: return False return True - def is_automorphic(self): - """ - Test for automorphism symmetry of graph. - """ - try: - next(self.get_automorphism_mapping()) - except StopIteration: - return False - return True - - def get_automorphism_mapping(self: 'Graph') -> Iterator[Dict[int, int]]: - """ - Iterator of all possible automorphism mappings. - """ - return _get_automorphism_mapping(self.atoms_order, self._bonds) - - def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=None, + def _get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter=True, searching_scope=None, components=None, get_mapping=None) -> Iterator[Dict[int, int]]: if components is None: # ad-hoc for QueryContainer components, closures = self._compiled_query @@ -141,14 +117,36 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No @cached_property def _compiled_query(self: 'Graph'): - components, closures = _compile_query(self._atoms, self._bonds) - if self.connected_components_count > 1: - order = {x: n for n, c in enumerate(self.connected_components) for x in c} - components.sort(key=lambda x: order[x[0][0]]) - return components, closures + return _compile_query(self._atoms, self._bonds) class MoleculeIsomorphism(Isomorphism): + __slots__ = () + + def __contains__(self: 'MoleculeContainer', other: Union[Element, Query, str]): + """ + Atom in Structure test. + """ + if isinstance(other, str): + return any(other == x.atomic_symbol for x in self._atoms.values()) + return any(other == x for x in self._atoms.values()) + + def is_automorphic(self): + """ + Test for automorphism symmetry of graph. + """ + try: + next(self.get_automorphism_mapping()) + except StopIteration: + return False + return True + + def get_automorphism_mapping(self: 'MoleculeContainer') -> Iterator[Dict[int, int]]: + """ + Iterator of all possible automorphism mappings. + """ + return _get_automorphism_mapping(self.atoms_order, self._bonds) + def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, searching_scope: Optional[Collection[int]] = None): """ @@ -163,7 +161,7 @@ def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: boo raise TypeError('MoleculeContainer expected') @cached_property - def _cython_compiled_structure(self): + def _cython_compiled_structure(self: 'MoleculeContainer'): # long I: # bond: single, double, triple, aromatic, special = 5 bit # bond in ring: 2 bit @@ -268,7 +266,9 @@ def _cython_compiled_structure(self): class QueryIsomorphism(Isomorphism): - def get_mapping(self, other: Union['MoleculeContainer', 'QueryContainer'], /, *, automorphism_filter: bool = True, + __slots__ = () + + def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, searching_scope: Optional[Collection[int]] = None, _cython=True): """ Get Query to Molecule substructure mapping generator. diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index 36086ada..e200cbc3 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -27,14 +27,14 @@ if TYPE_CHECKING: - from chython.containers.graph import Graph + from chython.containers import MoleculeContainer class Morgan: __slots__ = () @cached_property - def atoms_order(self: 'Graph') -> Dict[int, int]: + def atoms_order(self: 'MoleculeContainer') -> Dict[int, int]: """ Morgan like algorithm for graph nodes ordering @@ -48,7 +48,7 @@ def atoms_order(self: 'Graph') -> Dict[int, int]: return _morgan({n: hash((hash(a), n in ring)) for n, a in self._atoms.items()}, self.int_adjacency) @cached_property - def int_adjacency(self: 'Graph') -> Dict[int, Dict[int, int]]: + def int_adjacency(self: 'MoleculeContainer') -> Dict[int, Dict[int, int]]: """ Adjacency with integer-coded bonds. """ diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 37cde6dc..4871d5fa 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: - from chython.containers.graph import Graph + from chython.containers import MoleculeContainer class Rings: @@ -111,7 +111,7 @@ def rings_count(self) -> int: return sum(len(x) for x in bonds.values()) // 2 - len(bonds) + len(_connected_components(bonds)) @cached_property - def not_special_connectivity(self: 'Graph') -> Dict[int, Set[int]]: + def not_special_connectivity(self: 'MoleculeContainer') -> Dict[int, Set[int]]: """ Graph connectivity without special bonds. """ @@ -124,7 +124,7 @@ def not_special_connectivity(self: 'Graph') -> Dict[int, Set[int]]: return bonds @cached_property - def connected_components(self: 'Graph') -> List[Set[int]]: + def connected_components(self: 'MoleculeContainer') -> List[Set[int]]: """ Isolated components of single graph. E.g. salts as ion pair. """ @@ -138,7 +138,7 @@ def connected_components_count(self) -> int: return len(self.connected_components) @cached_property - def skin_graph(self: 'Graph') -> Dict[int, Set[int]]: + def skin_graph(self: 'MoleculeContainer') -> Dict[int, Set[int]]: """ Graph without terminal atoms. Only rings and linkers """ diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index 4f2e14ae..fc0e7d01 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -322,8 +322,9 @@ def _format_atom(self, n, adjacency, **kwargs): def _format_bond(self, n, m, adjacency, **kwargs): ... - def _smiles_order(self: 'Graph', stereo=True) -> Callable: - return self.atoms_order.__getitem__ + @abstractmethod + def _smiles_order(self, stereo=True) -> Callable: + ... def _format_cxsmiles(self, order) -> Optional[str]: ... @@ -375,7 +376,7 @@ def sticky_smiles(self: Union['MoleculeContainer', 'MoleculeSmiles'], left: int, smiles = smiles[2:] return ''.join(smiles) - def _smiles_order(self: 'MoleculeContainer', stereo=True) -> Callable: + def _smiles_order(self: 'MoleculeContainer', stereo=True): if stereo: return self._chiral_morgan.__getitem__ else: @@ -527,6 +528,9 @@ def __ct_map(self: 'MoleculeContainer', adjacency): class CGRSmiles(Smiles): __slots__ = () + def _smiles_order(self: 'CGRContainer', stereo=True): + return self.atoms_order.__getitem__ + def _format_atom(self: 'CGRContainer', n, adjacency, **kwargs): atom = self._atoms[n] if atom.isotope: @@ -552,6 +556,10 @@ def _format_bond(self: 'CGRContainer', n, m, adjacency, **kwargs): class QuerySmiles(Smiles): __slots__ = () + def _smiles_order(self: 'QueryContainer', stereo=True): + # try to keep atoms order + return {n: i for i, n in enumerate(self._atoms)}.__getitem__ + def _format_cxsmiles(self: 'QueryContainer', order): hh = ['atomProp'] cx = [] diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 7fa5dead..4586969e 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -19,8 +19,6 @@ from abc import ABC, abstractmethod from functools import cached_property from typing import Dict, Generic, Iterator, Optional, Tuple, TypeVar -from ..algorithms.morgan import Morgan -from ..algorithms.rings import Rings from ..exceptions import AtomNotFound, MappingError, BondNotFound @@ -28,7 +26,7 @@ Bond = TypeVar('Bond') -class Graph(Generic[Atom, Bond], Morgan, Rings, ABC): +class Graph(Generic[Atom, Bond], ABC): __slots__ = ('_atoms', '_bonds', '__dict__') __class_cache__ = {} @@ -101,7 +99,7 @@ def add_atom(self, atom: Atom, n: Optional[int] = None) -> int: self._atoms[n] = atom self._bonds[n] = {} - self.flush_cache(keep_sssr=True) + self.flush_cache() return n @abstractmethod @@ -169,27 +167,8 @@ def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): u._bonds.update(other._bonds) return u - def flush_cache(self, *, keep_sssr=False, keep_components=False): - backup = {} - if keep_sssr: - # good to keep if no new bonds or bonds deletions or bonds to/from any change - if 'sssr' in self.__dict__: - backup['sssr'] = self.sssr - if 'atoms_rings' in self.__dict__: - backup['atoms_rings'] = self.atoms_rings - if 'atoms_rings_sizes' in self.__dict__: - backup['atoms_rings_sizes'] = self.atoms_rings_sizes - if 'ring_atoms' in self.__dict__: - backup['ring_atoms'] = self.ring_atoms - if 'not_special_connectivity' in self.__dict__: - backup['not_special_connectivity'] = self.not_special_connectivity - if 'rings_count' in self.__dict__: - backup['rings_count'] = self.rings_count - if keep_components: - # good to keep if no new bonds or bonds deletions - if 'connected_components' in self.__dict__: - backup['connected_components'] = self.connected_components - self.__dict__ = backup + def flush_cache(self): + self.__dict__.clear() def __copy__(self): return self.copy() diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index b7969687..f80a453d 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -32,6 +32,8 @@ from ..algorithms.isomorphism import MoleculeIsomorphism from ..algorithms.fingerprints import Fingerprints from ..algorithms.mcs import MCS +from ..algorithms.morgan import Morgan +from ..algorithms.rings import Rings from ..algorithms.smiles import MoleculeSmiles from ..algorithms.standardize import StandardizeMolecule from ..algorithms.stereo import MoleculeStereo @@ -41,9 +43,9 @@ from ..periodictable import DynamicElement, Element, QueryElement, H -class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, - MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, - X3domMolecule): +class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], Morgan, Rings, MoleculeIsomorphism, + Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, + Fingerprints, Tautomers, MCS, X3domMolecule): __slots__ = ('_meta', '_name', '_conformers', '_changed', '_backup') def __init__(self): @@ -823,6 +825,28 @@ def check_implicit(self, n: int, h: int) -> bool: return True return False + def flush_cache(self, *, keep_sssr=False, keep_components=False): + backup = {} + if keep_sssr: + # good to keep if no new bonds or bonds deletions or bonds to/from any change + if 'sssr' in self.__dict__: + backup['sssr'] = self.sssr + if 'atoms_rings' in self.__dict__: + backup['atoms_rings'] = self.atoms_rings + if 'atoms_rings_sizes' in self.__dict__: + backup['atoms_rings_sizes'] = self.atoms_rings_sizes + if 'ring_atoms' in self.__dict__: + backup['ring_atoms'] = self.ring_atoms + if 'not_special_connectivity' in self.__dict__: + backup['not_special_connectivity'] = self.not_special_connectivity + if 'rings_count' in self.__dict__: + backup['rings_count'] = self.rings_count + if keep_components: + # good to keep if no new bonds or bonds deletions + if 'connected_components' in self.__dict__: + backup['connected_components'] = self.connected_components + self.__dict__ = backup + def __int__(self): """ Total charge of molecule From 38e0bd1a409e50e09679bf6e0eae984303d246eb Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 11:16:55 +0100 Subject: [PATCH 27/68] optimizations added --- chython/containers/molecule.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index f80a453d..d56c122d 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -237,13 +237,22 @@ def delete_bond(self, n: int, m: int, *, _skip_calculation=False): self.fix_structure() self.fix_stereo() - def copy(self) -> 'MoleculeContainer': + def copy(self, *, keep_sssr=False, keep_components=False) -> 'MoleculeContainer': copy = super().copy() copy._name = self._name if self._meta is None: copy._meta = None else: copy._meta = self._meta.copy() + + if keep_sssr: + for k, v in self.__dict__.items(): + if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', + 'ring_atoms', 'not_special_connectivity', 'rings_count'): + copy.__dict__[k] = v + if keep_components: + if 'connected_components' in self.__dict__: + copy.__dict__['connected_components'] = self.connected_components return copy def union(self, other: 'MoleculeContainer', *, remap: bool = False, copy: bool = True) -> 'MoleculeContainer': @@ -829,18 +838,10 @@ def flush_cache(self, *, keep_sssr=False, keep_components=False): backup = {} if keep_sssr: # good to keep if no new bonds or bonds deletions or bonds to/from any change - if 'sssr' in self.__dict__: - backup['sssr'] = self.sssr - if 'atoms_rings' in self.__dict__: - backup['atoms_rings'] = self.atoms_rings - if 'atoms_rings_sizes' in self.__dict__: - backup['atoms_rings_sizes'] = self.atoms_rings_sizes - if 'ring_atoms' in self.__dict__: - backup['ring_atoms'] = self.ring_atoms - if 'not_special_connectivity' in self.__dict__: - backup['not_special_connectivity'] = self.not_special_connectivity - if 'rings_count' in self.__dict__: - backup['rings_count'] = self.rings_count + for k, v in self.__dict__.items(): + if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', + 'ring_atoms', 'not_special_connectivity', 'rings_count'): + backup[k] = v if keep_components: # good to keep if no new bonds or bonds deletions if 'connected_components' in self.__dict__: @@ -884,7 +885,7 @@ def __enter__(self): """ Transaction of changes. Keep current state for restoring on errors. """ - self._backup = self.copy() + self._backup = self.copy(keep_sssr=True, keep_components=True) return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -894,7 +895,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self._bonds = backup._bonds self._meta = backup._meta self._name = backup._name - self.flush_cache() + self.__dict__ = backup.__dict__ else: # update internal state self.fix_structure() self.fix_stereo() From bdef5809ff6ec805b5f08637d2e7d6ca560d04a1 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 11:37:46 +0100 Subject: [PATCH 28/68] tautomers refactored --- chython/algorithms/aromatics/kekule.py | 2 +- chython/algorithms/tautomers/__init__.py | 104 +++-------------- chython/algorithms/tautomers/acid_base.py | 111 +++++++++++-------- chython/algorithms/tautomers/heteroarenes.py | 22 ++-- chython/algorithms/tautomers/keto_enol.py | 35 +++--- 5 files changed, 108 insertions(+), 166 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index f7d90918..6848638c 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -62,7 +62,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): """ self.__fix_rings() # fix bad aromatic rings for form in self.__kekule_full(0): - copy = self.copy() + copy = self.copy(keep_sssr=True, keep_components=True) bonds = copy._bonds atoms = set() for n, m, b in form: diff --git a/chython/algorithms/tautomers/__init__.py b/chython/algorithms/tautomers/__init__.py index 7a628c6d..e180eaef 100644 --- a/chython/algorithms/tautomers/__init__.py +++ b/chython/algorithms/tautomers/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # Copyright 2020 Nail Samikaev # This file is part of chython. # @@ -51,47 +51,25 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar """ if limit < 1: raise ValueError('limit should be greater or equal 1') - - has_stereo = bool(self._atoms_stereo or self._allenes_stereo or self._cis_trans_stereo) counter = 0 - copy = self.copy() - copy.clean_stereo() - # sssr, neighbors and heteroatoms are same for all tautomers. - # prevent recalculation by sharing cache. - self.__set_cache(copy) + copy = self.copy(keep_sssr=True, keep_components=True) if prepare_molecules: # transform to kekule form without hydrogens - k = copy.kekule() - i = copy.implicify_hydrogens(_fix_stereo=False) - if k or i: # reset cache after flush - self.__set_cache(copy) - - thiele = copy.copy() # transform to thiele to prevent duplicates and dearomatization - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): - self.__set_cache(thiele) - - # return origin structure as first tautomer - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + copy.kekule() + copy.implicify_hydrogens(_fix_stereo=False) + + # transform to thiele to prevent duplicates and dearomatization + thiele = copy.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) + yield thiele # return original structure as first tautomer seen = {thiele: None} # value is parent molecule - required for preventing migrations in sugars. # first try to neutralize if copy.neutralize(_fix_stereo=False): # found neutral form - thiele = copy.copy() - self.__set_cache(copy) # restore cache - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): - self.__set_cache(thiele) - - # return found neutral form - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + thiele = copy.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) + yield thiele counter += 1 seen[thiele] = None @@ -107,11 +85,8 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current, thiele_current = queue.popleft() for mol, ket in current._enumerate_keto_enol_tautomers(partial): - thiele = mol.copy() - self.__set_cache(mol) - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): # reset cache after flush_cache. - self.__set_cache(thiele) + thiele = mol.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) if thiele not in seen: seen[thiele] = current @@ -124,10 +99,7 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar queue = deque([(mol, thiele)]) new_queue = [thiele] copy = mol # new entry point. - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + yield thiele break if keep_sugars and current is not copy and ket: # prevent carbonyl migration in sugars. skip entry point. @@ -138,10 +110,7 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar queue.append((mol, thiele)) new_queue.append(thiele) - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + yield thiele counter += 1 if counter == limit: return @@ -152,15 +121,11 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current = queue.popleft() for mol in current._enumerate_hetero_arene_tautomers(): - self.__set_cache(mol) if mol not in seen: seen[mol] = None queue.append(mol) new_queue.append(mol) # new hetero-arenes also should be included to this list. - if has_stereo: - yield self.__set_stereo(mol.copy()) - else: - yield mol + yield mol counter += 1 if counter == limit: return @@ -171,14 +136,10 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current = queue.popleft() for mol in current._enumerate_zwitter_tautomers(): - self.__set_cache(mol) if mol not in seen: seen[mol] = None queue.append(mol) - if has_stereo: - yield self.__set_stereo(mol.copy()) - else: - yield mol + yield mol counter += 1 if counter == limit: return @@ -206,34 +167,5 @@ def enumerate_charged_tautomers(self: 'MoleculeContainer', *, prepare_molecules= if count == limit: return - def __set_cache(self: 'MoleculeContainer', mol): - try: - neighbors = self.__dict__['__cached_args_method_neighbors'] - except KeyError: - neighbors = self.__dict__['__cached_args_method_neighbors'] = {} - try: - heteroatoms = self.__dict__['__cached_args_method_heteroatoms'] - except KeyError: - heteroatoms = self.__dict__['__cached_args_method_heteroatoms'] = {} - try: - is_ring_bond = self.__dict__['__cached_args_method_is_ring_bond'] - except KeyError: - is_ring_bond = self.__dict__['__cached_args_method_is_ring_bond'] = {} - - mol.__dict__['sssr'] = self.sssr # thiele/kekule - mol.__dict__['ring_atoms'] = self.ring_atoms # morgan - mol.__dict__['_connected_components'] = self._connected_components # isomorphism - mol.__dict__['atoms_rings_sizes'] = self.atoms_rings_sizes # isomorphism - mol.__dict__['__cached_args_method_neighbors'] = neighbors # isomorphism - mol.__dict__['__cached_args_method_heteroatoms'] = heteroatoms # isomorphism - mol.__dict__['__cached_args_method_is_ring_bond'] = is_ring_bond # isomorphism - - def __set_stereo(self: 'MoleculeContainer', mol): - mol._atoms_stereo.update(self._atoms_stereo) - mol._allenes_stereo.update(self._allenes_stereo) - mol._cis_trans_stereo.update(self._cis_trans_stereo) - mol.fix_stereo() - return mol - __all__ = ['Tautomers'] diff --git a/chython/algorithms/tautomers/acid_base.py b/chython/algorithms/tautomers/acid_base.py index bb1a672f..c901cbcd 100644 --- a/chython/algorithms/tautomers/acid_base.py +++ b/chython/algorithms/tautomers/acid_base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -44,9 +44,8 @@ def neutralize(self: 'MoleculeContainer', *, keep_charge=True, logging=False, return [] return False - self._charges.update(mol._charges) - self._hydrogens.update(mol._hydrogens) - self.flush_cache() + self._atoms.update(mol._atoms) + self.flush_cache(keep_sssr=True, keep_components=True) if _fix_stereo: self.fix_stereo() if logging: @@ -85,14 +84,16 @@ def enumerate_charged_forms(self: 'MoleculeContainer', *, deep: int = 4, limit: continue uniq.add(dc) seen_combo.add((dc, ac)) - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) for n in ac: - mol._hydrogens[n] += 1 - mol._charges[n] += 1 + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for n in dc: if n is not None: - mol._hydrogens[n] -= 1 - mol._charges[n] -= 1 + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 if mol not in seen: seen.add(mol) yield mol @@ -109,15 +110,17 @@ def enumerate_charged_forms(self: 'MoleculeContainer', *, deep: int = 4, limit: uniq.add(ac) if (dc, ac) in seen_combo: continue - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) for n in ac: if n is not None: - mol._hydrogens[n] += 1 - mol._charges[n] += 1 + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for n in dc: if n is not None: - mol._hydrogens[n] -= 1 - mol._charges[n] -= 1 + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 if mol not in seen: seen.add(mol) yield mol @@ -139,44 +142,52 @@ def _neutralize(self: 'MoleculeContainer', keep_charge=True): if not donors or not acceptors: return # neutralization impossible elif len(donors) > len(acceptors): - copy = self.copy() - for a in acceptors: - copy._hydrogens[a] += 1 - copy._charges[a] += 1 + copy = self.copy(keep_sssr=True, keep_components=True) + for n in acceptors: + a = copy._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for c in combinations(donors, len(acceptors)): - mol = copy.copy() - for d in c: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 + mol = copy.copy(keep_sssr=True, keep_components=True) + for n in c: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 yield mol, acceptors.union(c) elif len(donors) < len(acceptors): - copy = self.copy() - for d in donors: - copy._hydrogens[d] -= 1 - copy._charges[d] -= 1 + copy = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = copy._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 for c in combinations(acceptors, len(donors)): - mol = copy.copy() - for a in c: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = copy.copy(keep_sssr=True, keep_components=True) + for n in c: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors.union(c) else: # balanced! - mol = self.copy() - for d in donors: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 - for a in acceptors: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 + for n in acceptors: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors | acceptors elif donors or acceptors: - mol = self.copy() - for d in donors: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 - for a in acceptors: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 + for n in acceptors: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors | acceptors def _enumerate_zwitter_tautomers(self: 'MoleculeContainer'): @@ -190,11 +201,13 @@ def _enumerate_zwitter_tautomers(self: 'MoleculeContainer'): acceptors.add(mapping[1]) for d, a in product(donors, acceptors): - mol = self.copy() - mol._hydrogens[d] -= 1 - mol._hydrogens[a] += 1 - mol._charges[d] -= 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + d = mol._atoms[d] + a = mol._atoms[a] + d._implicit_hydrogens -= 1 + a._implicit_hydrogens += 1 + d._charge -= 1 + a._charge += 1 yield mol diff --git a/chython/algorithms/tautomers/heteroarenes.py b/chython/algorithms/tautomers/heteroarenes.py index 81837438..3e6ac345 100644 --- a/chython/algorithms/tautomers/heteroarenes.py +++ b/chython/algorithms/tautomers/heteroarenes.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -33,9 +33,6 @@ class HeteroArenes: def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): atoms = self._atoms bonds = self._bonds - hydrogens = self._hydrogens - charges = self._charges - radicals = self._radicals rings = defaultdict(list) # aromatic skeleton for n, m_bond in bonds.items(): @@ -49,19 +46,20 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): donors = set() single_bonded = set() for n, ms in rings.items(): + a = atoms[n] if len(ms) == 2: - if atoms[n].atomic_number in (5, 7, 15): - if not charges[n] and not radicals[n]: + if a.atomic_number in (5, 7, 15): + if not a.charge and not a.is_radical: # only neutral B, N, P - if hydrogens[n]: # pyrrole + if a.implicit_hydrogens: # pyrrole donors.add(n) elif len(bonds[n]) == 2: # pyridine acceptors.add(n) else: single_bonded.add(n) - elif charges[n] == -1 and atoms[n].atomic_number == 6: # ferrocene + elif a.charge == -1 and a.atomic_number == 6: # ferrocene single_bonded.add(n) - elif len(ms) == 3 and atoms[n].atomic_number in (5, 7, 15) and not charges[n] and not radicals[n]: + elif len(ms) == 3 and a.atomic_number in (5, 7, 15) and not a.charge and not a.is_radical: single_bonded.add(n) if not donors or not acceptors: return @@ -94,9 +92,9 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): next(_kekule_component(component, sb, (), 0)) except InvalidAromaticRing: continue - mol = self.copy() - mol._hydrogens[d] = 0 - mol._hydrogens[a] = 1 + mol = self.copy(keep_sssr=True, keep_components=True) + mol._atoms[d]._implicit_hydrogens = 0 + mol._atoms[a]._implicit_hydrogens = 1 yield mol diff --git a/chython/algorithms/tautomers/keto_enol.py b/chython/algorithms/tautomers/keto_enol.py index acad2241..f9fd582b 100644 --- a/chython/algorithms/tautomers/keto_enol.py +++ b/chython/algorithms/tautomers/keto_enol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -39,13 +39,13 @@ def _enumerate_keto_enol_tautomers(self: Union['MoleculeContainer', 'KetoEnol'], a = fix[0][0] d = fix[-1][1] - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) m_bonds = mol._bonds for n, m, b in fix: - m_bonds[n][m]._Bond__order = b + m_bonds[n][m]._order = b - mol._hydrogens[a] += 1 - mol._hydrogens[d] -= 1 + mol._atoms[a]._implicit_hydrogens += 1 + mol._atoms[d]._implicit_hydrogens -= 1 yield mol, ket @cached_property @@ -59,8 +59,6 @@ def _sugar_groups(self): def __enumerate_bonds(self: 'MoleculeContainer', partial): atoms = self._atoms bonds = self._bonds - hydrogens = self._hydrogens - hybridization = self.hybridization rings = self.atoms_rings_sizes # search neutral oxygen and nitrogen @@ -83,11 +81,12 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): if partial and path and not len(path) % 2 and \ (hydrogen or # enol > ketone - hydrogens[(x := path[-1][1])] and (x not in rings or all(x > 7 for x in rings[x]))): # ketone> + atoms[(x := path[-1][1])].implicit_hydrogens and + (x not in rings or all(x > 7 for x in rings[x]))): # ketone> # return partial hops. ignore allenes in small rings. yield path, hydrogen if len(path) > depth: # fork found - if not partial and not len(path) % 2 and (hydrogen or hydrogens[path[-1][1]]): + if not partial and not len(path) % 2 and (hydrogen or atoms[path[-1][1]].implicit_hydrogens): # end of path found. return it and start new one. yield path, hydrogen seen.difference_update(x for _, x, _ in path[depth:]) @@ -110,32 +109,32 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): continue elif n in anti: # enol-ketone switch if current in anti[n]: - if hydrogens: - if b.order == 2: + if hydrogen: + if b == 2: cp = path.copy() cp.append((current, n, 1)) yield cp, True - elif b.order == 1: + elif b == 1: cp = path.copy() cp.append((current, n, 2)) yield cp, False - elif b.order == bond and atoms[n].atomic_number == 6: # classic keto-enol route - hb = hybridization(n) - if hb == 2: # grow up + elif b.order == bond and (a := atoms[n]).atomic_number == 6: # classic keto-enol route + if a.hybridization == 2: # grow up stack.append((current, n, next_bond, depth)) elif hydrogen: - if hb == 3: # OC=CC=C=C case + if a.hybridization == 3: # OC=CC=C=C case cp = path.copy() cp.append((current, n, 1)) yield cp, True # ketone found - elif hb == 1 and hydrogens[n]: # ketone >> enol + elif a.hybridization == 1 and a.implicit_hydrogens: # ketone >> enol cp = path.copy() cp.append((current, n, 2)) yield cp, False if path and not len(path) % 2 and \ (hydrogen or # enol > ketone - hydrogens[(x := path[-1][1])] and (x not in rings or all(x > 7 for x in rings[x]))): + atoms[(x := path[-1][1])].implicit_hydrogens and + (x not in rings or all(x > 7 for x in rings[x]))): yield path, hydrogen From 109c8de189a2af6ac5c423a73fc232a34cb7b54b Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 16:36:08 +0100 Subject: [PATCH 29/68] fixes --- chython/algorithms/tautomers/acid_base.py | 2 +- chython/algorithms/tautomers/heteroarenes.py | 2 +- chython/algorithms/tautomers/keto_enol.py | 23 +++++++++++--------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/chython/algorithms/tautomers/acid_base.py b/chython/algorithms/tautomers/acid_base.py index c901cbcd..4323b0c8 100644 --- a/chython/algorithms/tautomers/acid_base.py +++ b/chython/algorithms/tautomers/acid_base.py @@ -44,7 +44,7 @@ def neutralize(self: 'MoleculeContainer', *, keep_charge=True, logging=False, return [] return False - self._atoms.update(mol._atoms) + self._atoms = mol._atoms self.flush_cache(keep_sssr=True, keep_components=True) if _fix_stereo: self.fix_stereo() diff --git a/chython/algorithms/tautomers/heteroarenes.py b/chython/algorithms/tautomers/heteroarenes.py index 3e6ac345..4115d6a3 100644 --- a/chython/algorithms/tautomers/heteroarenes.py +++ b/chython/algorithms/tautomers/heteroarenes.py @@ -37,7 +37,7 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): rings = defaultdict(list) # aromatic skeleton for n, m_bond in bonds.items(): for m, bond in m_bond.items(): - if bond.order == 4: + if bond == 4: rings[n].append(m) if not rings: return diff --git a/chython/algorithms/tautomers/keto_enol.py b/chython/algorithms/tautomers/keto_enol.py index f9fd582b..ddcd14d7 100644 --- a/chython/algorithms/tautomers/keto_enol.py +++ b/chython/algorithms/tautomers/keto_enol.py @@ -44,8 +44,12 @@ def _enumerate_keto_enol_tautomers(self: Union['MoleculeContainer', 'KetoEnol'], for n, m, b in fix: m_bonds[n][m]._order = b - mol._atoms[a]._implicit_hydrogens += 1 - mol._atoms[d]._implicit_hydrogens -= 1 + a = mol._atoms[a] + d = mol._atoms[d] + a._implicit_hydrogens += 1 + d._implicit_hydrogens -= 1 + a._hybridization -= 1 # -C=X>=C-X or -C=C=X>=C-C=X + d._hybridization += 1 yield mol, ket @cached_property @@ -108,17 +112,16 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): elif n in seen: # aromatic ring destruction. pyridine double bonds shift continue elif n in anti: # enol-ketone switch - if current in anti[n]: + if current in anti[n]: # keton or enol bond if hydrogen: - if b == 2: - cp = path.copy() - cp.append((current, n, 1)) - yield cp, True - elif b == 1: cp = path.copy() - cp.append((current, n, 2)) + cp.append((current, n, 1)) # double to single in keton end + yield cp, True + else: + cp = path.copy() + cp.append((current, n, 2)) # single to double in enol end yield cp, False - elif b.order == bond and (a := atoms[n]).atomic_number == 6: # classic keto-enol route + elif b == bond and (a := atoms[n]).atomic_number == 6: # classic keto-enol route if a.hybridization == 2: # grow up stack.append((current, n, next_bond, depth)) elif hydrogen: From d52d062620e58bf9761cd4662504665877e4c665 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 18:47:20 +0100 Subject: [PATCH 30/68] bond assessment streamlined through operator overloading --- chython/algorithms/aromatics/kekule.py | 9 +++--- chython/algorithms/aromatics/thiele.py | 5 ++-- chython/algorithms/depict.py | 7 ++--- chython/algorithms/isomorphism.py | 14 ++++----- chython/algorithms/smiles.py | 12 ++++---- chython/algorithms/standardize/molecule.py | 12 ++++---- chython/algorithms/standardize/resonance.py | 4 +-- chython/algorithms/x3dom.py | 7 ++--- chython/containers/bonds.py | 6 ++-- chython/containers/molecule.py | 33 ++++++++++----------- chython/files/_convert.py | 4 +-- 11 files changed, 52 insertions(+), 61 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 6848638c..13905644 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -113,12 +113,11 @@ def __prepare_rings(self: 'MoleculeContainer'): triple_bonded = set() for n, m_bond in bonds.items(): for m, bond in m_bond.items(): - bo = bond.order - if bo == 4: + if bond == 4: rings[n].append(m) - elif bo == 2: + elif bond == 2: double_bonded[n].append(m) - elif bo == 3: + elif bond == 3: triple_bonded.add(n) if not rings: @@ -160,7 +159,7 @@ def __prepare_rings(self: 'MoleculeContainer'): if m not in seen: rings[n].remove(m) rings[m].remove(n) - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 if any(len(ms) not in (2, 3) for ms in rings.values()): raise InvalidAromaticRing('not in ring aromatic bond or hypercondensed rings: ' diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index f236e887..c8034bcb 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -127,8 +127,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: return False # check out-of-ring double bonds - double_bonded = {n for n in rings if any(m not in rings[n] and b.order == 2 - for m, b in bonds[n].items())} + double_bonded = {n for n in rings if any(m not in rings[n] and b == 2 for m, b in bonds[n].items())} # fix_tautomers if fix_tautomers and acceptors and donors: @@ -157,7 +156,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: seen.add(current) new_order = 1 if order == 2 else 2 stack.extend((current, n, depth, new_order) for n in rings[current] if - n not in seen and n not in double_bonded and bonds[current][n].order == order) + n not in seen and n not in double_bonded and bonds[current][n] == order) else: # path not found continue for n, m, o in path: diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index a48eb6c7..73cf2319 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -271,17 +271,16 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): for n, m, bond in self.bonds(): if m in wedge[n]: continue - order = bond.order nx, ny = atoms[n].xy mx, my = atoms[m].xy ny, my = -ny, -my - if order in (1, 4): + if bond in (1, 4): svg.append(f' ') - elif order == 2: + elif bond == 2: dx, dy = _rotate_vector(0, double_space, mx - nx, ny - my) svg.append(f' ') svg.append(f' ') - elif order == 3: + elif bond == 3: dx, dy = _rotate_vector(0, triple_space, mx - nx, ny - my) svg.append(f' ') svg.append(f' ') diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index a6ddea3e..30243690 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -245,15 +245,14 @@ def _cython_compiled_structure(self: 'MoleculeContainer'): for j, (m, b) in enumerate(ms.items(), start): indices[j] = x = mapping[m] v = bits1[x] - o = b.order - if o == 1: + if b == 1: v |= 0x0800000000000000 - elif o == 4: - v |= 0x4000000000000000 - elif o == 2: + elif b == 2: v |= 0x1000000000000000 - elif o == 3: + elif b == 3: v |= 0x2000000000000000 + elif b == 4: + v |= 0x4000000000000000 else: v |= 0x8000000000000000 v |= 0x0400000000000000 if b.in_ring else 0x0200000000000000 @@ -488,8 +487,7 @@ def _get_automorphism_mapping(atoms: Dict[int, int], bonds: Dict[int, Dict[int, return # all atoms unique components, closures = _compile_query(atoms, bonds) - mappers = [_get_mapping(order, closures, atoms, bonds, {x for x, *_ in order}) - for order in components] + mappers = [_get_mapping(order, closures, atoms, bonds, {x for x, *_ in order}) for order in components] if len(mappers) == 1: for mapping in mappers[0]: if any(k != v for k, v in mapping.items()): diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index fc0e7d01..8569ff1f 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -452,12 +452,12 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjacency, **kwargs): if not kwargs.get('bonds', True): return '' - order = self._bonds[n][m].order - if order == 4: + bond = self._bonds[n][m] + if bond == 4: if kwargs.get('aromatic', True): return '' return ':' - elif order == 1: # cis-trans /\ + elif bond == 1: # cis-trans /\ if kwargs.get('aromatic', True) and self._atoms[n].hybridization == self._atoms[m].hybridization == 4: return '-' if kwargs.get('stereo', True): @@ -469,11 +469,11 @@ def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjac if (x := ct_map.get((n, m))) is not None: return '/' if x else '\\' return '' - elif order == 2: + elif bond == 2: return '=' - elif order == 3: + elif bond == 3: return '#' - else: # order == 8 + else: # bond == 8 return '~' def __ct_map(self: 'MoleculeContainer', adjacency): diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 049671a2..a69db682 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -235,7 +235,7 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol continue ch = ch[0][0] ca = [n for n in r if atoms[n].atomic_number == 6 and - (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b.order == 1 for b in bonds[n].values()))] + (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b == 1 for b in bonds[n].values()))] if len(ca) < 2 or ch not in ca: continue atoms[ch]._charge = 0 # reset charge for morgan recalculation @@ -268,7 +268,7 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, """ bonds = self._bonds - ab = [(n, m) for n, m, b in self.bonds() if b.order == 8] + ab = [(n, m) for n, m, b in self.bonds() if b == 8] if keep_to_terminal: skeleton = self.not_special_connectivity @@ -303,10 +303,10 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo if len(bonds[n]) > 1: raise ValenceError(f'Hydrogen atom {n} has invalid valence. Try to use remove_coordinate_bonds()') for m, b in bonds[n].items(): - if b.order == 1: + if b == 1: if atoms[m].atomic_number != 1: # not H-H explicit[m].append(n) - elif b.order != 8: + elif b != 8: raise ValenceError(f'Hydrogen atom {n} has invalid valence {b.order}.') to_remove = set() @@ -319,7 +319,7 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo explicit_sum = 0 explicit_dict = defaultdict(int) for m, bond in bonds[n].items(): - if m not in hi and bond.order != 8: + if m not in hi and bond != 8: explicit_sum += bond.order explicit_dict[(bond.order, atoms[m].atomic_number)] += 1 try: @@ -454,7 +454,7 @@ def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): hs.add(m) if m in bonds[n]: b = bonds[n][m] - if b.order == 8 or b == 8: + if b == 8 or bo == 8: keep_sssr = False b._order = bo else: # new bond diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 31f0a0da..d703083f 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -154,8 +154,8 @@ def __entries(self: 'MoleculeContainer'): (n1, b1), (n2, b2) = bonds[n].items() an1 = atoms[n1] an2 = atoms[n2] - if b1.order == b2.order == 2 and (an1.charge == -1 and an1.atomic_number == 7 or - an2.charge == -1 and an2.atomic_number == 7): + if b1 == b2 == 2 and (an1.charge == -1 and an1.atomic_number == 7 or + an2.charge == -1 and an2.atomic_number == 7): continue elif lb == 3 and a.hybridization == 2: # X=[N+](-X)-X - prevent N-N migration nitrogen_ani.add(n) diff --git a/chython/algorithms/x3dom.py b/chython/algorithms/x3dom.py index 2118899b..9d59160d 100644 --- a/chython/algorithms/x3dom.py +++ b/chython/algorithms/x3dom.py @@ -221,7 +221,6 @@ def __render_bonds(self: 'MoleculeContainer', xyz): doubles = {} half_triple = triple_space / 2 for n, m, bond in self.bonds(): - order = bond.order nx, ny, nz = xyz[n] mx, my, mz = xyz[m] @@ -233,13 +232,13 @@ def __render_bonds(self: 'MoleculeContainer', xyz): rotation_angle = acos(nmy / length) lengths[(n, m)] = lengths[(m, n)] = (length, rotation_angle) x, y, z = nx + nmx / 2, ny + nmy / 2, nz + nmz / 2 - if order in (1, 4): + if bond in (1, 4): xml.append(f" \n \n \n" f" \n \n" f" \n \n" " \n \n \n") - elif order == 2: + elif bond == 2: if n in doubles: # normal for plane n m o norm_x, norm_y, norm_z = plane_normal(nmx, nmy, nmz, *doubles[n]) @@ -286,7 +285,7 @@ def __render_bonds(self: 'MoleculeContainer', xyz): f" \n \n" f" \n \n" " \n \n \n") - elif order == 3: + elif bond == 3: nox, noy, noz = vector_normal(nmx, nmy, nmz) # normal for plane n m o diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index a6ce7721..43847d51 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -31,10 +31,10 @@ def __init__(self, order: int): self._stereo = None def __eq__(self, other): - if isinstance(other, Bond): - return self.order == other.order - elif isinstance(other, int): + if isinstance(other, int): return self.order == other + elif isinstance(other, Bond): + return self.order == other.order return False def __repr__(self): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index d56c122d..474490ee 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -186,7 +186,7 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): bond = Bond(bond) super().add_bond(n, m, bond) - if bond.order == 8: + if bond == 8: return # any bond doesn't change anything if self._changed is None: self._changed = {n, m} @@ -208,7 +208,7 @@ def delete_atom(self, n: int, *, _skip_calculation=False): del self._atoms[n] for m, bond in self._bonds.pop(n).items(): del self._bonds[m][n] - if bond.order == 8: + if bond == 8: continue if self._changed is None: self._changed = {m} @@ -227,7 +227,7 @@ def delete_bond(self, n: int, m: int, *, _skip_calculation=False): Call `kekule()` and `thiele()` in sequence to fix marks. """ del self._bonds[n][m] - if self._bonds[m].pop(n).order != 8: + if self._bonds[m].pop(n) != 8: if self._changed is None: self._changed = {n, m} else: @@ -727,15 +727,14 @@ def calc_labels(self): for m, bond in m_bond.items(): bond._in_ring = anr and (amr := atoms_rings.get(m) or False) and not anr.isdisjoint(amr) # have common rings - order = bond.order - if order == 8: + if bond == 8: continue - elif order == 4: + elif bond == 4: hybridization = 4 elif hybridization != 4: - if order == 3: + if bond == 3: hybridization = 3 - elif order == 2: + elif bond == 2: if hybridization == 1: hybridization = 2 elif hybridization == 2: @@ -769,16 +768,15 @@ def calc_implicit(self, n: int): explicit_dict = defaultdict(int) aroma = 0 for m, bond in self._bonds[n].items(): - order = bond.order - if order == 4: # only neutral carbon aromatic rings supported + if bond == 4: # only neutral carbon aromatic rings supported if not atom.charge and not atom.is_radical and atom.atomic_number == 6: aroma += 1 else: # use `kekule()` to calculate proper implicit hydrogens count atom._implicit_hydrogens = None return - elif order != 8: # any bond used for complexes - explicit_sum += order - explicit_dict[(order, self._atoms[m].atomic_number)] += 1 + elif bond != 8: # any bond used for complexes + explicit_sum += bond.order + explicit_dict[(bond.order, self._atoms[m].atomic_number)] += 1 if aroma == 2: if explicit_sum == 0: # H-Ar @@ -818,12 +816,11 @@ def check_implicit(self, n: int, h: int) -> bool: explicit_dict = defaultdict(int) for m, bond in self._bonds[n].items(): - order = bond.order - if order == 4: # can't check aromatic rings + if bond == 4: # can't check aromatic rings return False - elif order != 8: # any bond used for complexes - explicit_sum += order - explicit_dict[(order, self._atoms[m].atomic_number)] += 1 + elif bond != 8: # any bond used for complexes + explicit_sum += bond.order + explicit_dict[(bond.order, self._atoms[m].atomic_number)] += 1 try: rules = atom.valence_rules(explicit_sum) diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 6da1ffd6..422a46a9 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -87,7 +87,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False # rare H0 case if (not keep_radicals and not ignore_aromatic_radicals and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) - and sum(b.order != 8 for b in bonds[n].values()) == 2): + and sum(b != 8 for b in bonds[n].values()) == 2): # c[c]c - aromatic B,C,N,P radical a._is_radical = True radicalized.append(n) @@ -107,7 +107,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if a.hybridization == 4: if (not keep_radicals and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) - and sum(b.order != 8 for b in bonds[n].values()) == 2): + and sum(b != 8 for b in bonds[n].values()) == 2): # c[c]c - aromatic B,C,N,P radical a._implicit_hydrogens = 0 a._is_radical = True From 36f6fbdefdd422010eacbf95ab29e2e1c5673783 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 21:25:24 +0100 Subject: [PATCH 31/68] atom matching streamlined through operator overloading. constants added for better readability. --- chython/algorithms/aromatics/kekule.py | 32 ++++++++++----- chython/algorithms/aromatics/thiele.py | 29 +++++++++----- chython/algorithms/smiles.py | 13 ++++-- chython/algorithms/standardize/molecule.py | 17 +++++--- chython/algorithms/standardize/resonance.py | 30 +++++++++----- chython/algorithms/standardize/salts.py | 14 ++++--- chython/algorithms/stereo.py | 42 +++++++++++--------- chython/algorithms/tautomers/heteroarenes.py | 13 ++++-- chython/algorithms/tautomers/keto_enol.py | 6 ++- chython/containers/molecule.py | 24 ++++++----- chython/files/_convert.py | 14 +++++-- chython/periodictable/base/element.py | 4 ++ 12 files changed, 155 insertions(+), 83 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 13905644..7f3cbd6c 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -27,6 +27,18 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +P = 15 +S = 16 +As = 33 +Se = 34 +Te = 52 + + class Kekule: __slots__ = () @@ -170,16 +182,14 @@ def __prepare_rings(self: 'MoleculeContainer'): if any(len(rings[n]) != 2 for n in double_bonded): # double bonded never condensed raise InvalidAromaticRing('quinone valence error') for n in double_bonded: - atom = atoms[n] - if atom.atomic_number == 7: + if (atom := atoms[n]) == N: if atom.charge != 1: raise InvalidAromaticRing('quinone should be charged N atom') - elif atom.atomic_number not in (6, 15, 16, 33, 34, 52) or atom.charge: + elif atom not in (C, P, S, As, Se, Te) or atom.charge: raise InvalidAromaticRing('quinone should be neutral S, Se, Te, C, P, As atom') for n in rings: - atom = atoms[n] - if atom.atomic_number == 6: # carbon + if (atom := atoms[n]) == C: # carbon if atom.charge == 0: if atom.neighbors not in (2, 3): raise InvalidAromaticRing @@ -197,14 +207,14 @@ def __prepare_rings(self: 'MoleculeContainer'): raise InvalidAromaticRing else: raise InvalidAromaticRing - elif atom.atomic_number in (7, 15, 33): + elif atom in (N, P, As): if atom.charge == 0: # pyrrole or pyridine. include radical pyrrole if atom.is_radical: if atom.neighbors != 2: # only pyrrole radical raise InvalidAromaticRing double_bonded.add(n) elif atom.neighbors == 3: - if atom.atomic_number == 7: # pyrrole only possible + if atom == N: # pyrrole only possible double_bonded.add(n) else: # P(III) or P(V)H pyrroles.add(n) @@ -215,7 +225,7 @@ def __prepare_rings(self: 'MoleculeContainer'): double_bonded.add(n) elif atom.implicit_hydrogens: # too many hydrogens for aromatic rings raise InvalidAromaticRing - elif atom.neighbors != 4 or atom.atomic_number not in (15, 33): # P(V) in ring [P;a](-R1)-R2 + elif atom.neighbors != 4 or atom not in (P, As): # P(V) in ring [P;a](-R1)-R2 raise InvalidAromaticRing elif atom.charge == -1: # pyrrole only if atom.neighbors != 2 or atom.is_radical: @@ -230,7 +240,7 @@ def __prepare_rings(self: 'MoleculeContainer'): pyrroles.add(n) elif atom.neighbors != 3: # not pyridine oxyde raise InvalidAromaticRing - elif atom.atomic_number == 8: # furan + elif atom == O: # furan if atom.neighbors == 2: if atom.charge == 0: if atom.is_radical: @@ -244,7 +254,7 @@ def __prepare_rings(self: 'MoleculeContainer'): raise InvalidAromaticRing('invalid oxygen charge') else: raise InvalidAromaticRing('Triple-bonded oxygen') - elif atom.atomic_number in (16, 34, 52): # thiophene + elif atom in (S, Se, Te): # thiophene if n not in double_bonded: # not sulphoxyde nor sulphone if atom.neighbors == 2: if atom.is_radical: @@ -267,7 +277,7 @@ def __prepare_rings(self: 'MoleculeContainer'): raise InvalidAromaticRing('S, Se, Te invalid charge ring') else: raise InvalidAromaticRing('S, Se, Te hypervalent ring') - elif atom.atomic_number == 5: # boron + elif atom == B: if atom.charge == 0: if atom.neighbors == 2: if atom.is_radical: # C=1O[B]OC=1 diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index c8034bcb..37ef5704 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -41,6 +41,15 @@ def _freaks(): freak_rules = Proxy(_freaks) +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +P = 15 +S = 16 +Se = 34 + class Thiele: __slots__ = () @@ -68,7 +77,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: if not 3 < lr < 8: # skip 3-membered and big rings continue # only B C N O P S with 2-3 neighbors. detects this: C1=CC=CP12=CC=CC=C2 - if any(atoms[n].atomic_number not in (6, 7, 8, 16, 5, 15) or len(nsc[n]) > 3 for n in ring): + if any(atoms[n] not in (C, N, O, S, B, P) or len(nsc[n]) > 3 for n in ring): continue sp2 = sum(atoms[n].hybridization == 2 for n in ring) if sp2 == lr: # benzene like @@ -76,7 +85,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: tetracycles.append(ring) else: if fix_tautomers and lr % 2: # find potential pyrroles - acceptors.update(n for n in ring if (a := atoms[n]).atomic_number == 7 and not a.charge) + acceptors.update(n for n in ring if (a := atoms[n]) == N and not a.charge) n, *_, m = ring rings[n].add(m) rings[m].add(n) @@ -88,26 +97,24 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: n = next(n for n in ring if atoms[n].hybridization == 1) except StopIteration: # exotic, just skip continue - a = atoms[n] - an = a.atomic_number - if (c := a.charge) == -1: - if an != 6 or lr != 5: # skip any but ferrocene + if (a := atoms[n]).charge == -1: + if a != C or lr != 5: # skip any but ferrocene continue - elif c: # skip any charged + elif a.charge: # skip any charged continue elif lr == 7: # skip electron-rich 7-membered rings - if an != 5: # not B? + if a != 5: # not B? continue # below lr == 5 or 6 only - elif an in (8, 16, 34): # O, S, Se + elif a in (O, S, Se): if len(bonds[n]) != 2: # like CS1(C)C=CC=C1 continue - elif an == 7: + elif a == N: if (b := len(bonds[n])) > 3: # extra check for invalid N(IV) continue elif fix_tautomers and lr == 6 and b == 2: donors.append(n) - elif an in (5, 15): # B, P + elif a in (B, P): if len(bonds[n]) > 3: continue else: # only B, [C-], N, O, P, S, Se diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index 8569ff1f..fecbae0d 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -50,6 +50,13 @@ dyn_radical_str = {(True, True): '*', (True, False): '*>^', (False, True): '^>*'} +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 +S = 16 + class Smiles(ABC): __slots__ = () @@ -424,18 +431,18 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): smi[4] = 'H' elif atom.implicit_hydrogens: smi[4] = f'H{atom.implicit_hydrogens}' - elif atom.hybridization == 4 and atom.implicit_hydrogens and atom.atomic_number in (5, 7, 15): # pyrrole + elif atom.hybridization == 4 and atom.implicit_hydrogens and atom in (B, N, P): # pyrrole smi[0] = '[' smi[-1] = ']' if atom.implicit_hydrogens == 1: smi[4] = 'H' else: smi[4] = f'H{atom.implicit_hydrogens}' - elif not atom.implicit_hydrogens and atom.atomic_number in (5, 6, 15, 16) and not self.not_special_connectivity[n]: + elif not atom.implicit_hydrogens and atom in (B, C, P, S) and not self.not_special_connectivity[n]: # elemental B, C, P, S smi[0] = '[' smi[-1] = ']' - elif atom.implicit_hydrogens and atom.atomic_number == 15 and atom.hybridization != 1: + elif atom.implicit_hydrogens and atom == P and atom.hybridization != 1: smi[0] = '[' smi[-1] = ']' if atom.implicit_hydrogens == 1: diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index a69db682..5f4e5e77 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -25,13 +25,18 @@ from ._metal_organics import rules as metal_rules from ...containers.bonds import Bond from ...exceptions import ValenceError, ImplementationError -from ...periodictable import H +from ...periodictable import H as _H if TYPE_CHECKING: from chython import MoleculeContainer +# atomic number constants +H = 5 +C = 6 + + class Standardize: __slots__ = () @@ -234,7 +239,7 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol if len(ch) != 1 or ch[0][1] != -1: continue ch = ch[0][0] - ca = [n for n in r if atoms[n].atomic_number == 6 and + ca = [n for n in r if atoms[n] == C and (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b == 1 for b in bonds[n].values()))] if len(ca) < 2 or ch not in ca: continue @@ -272,7 +277,7 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, if keep_to_terminal: skeleton = self.not_special_connectivity - hs = {n for n, a in self._atoms.items() if a.atomic_number == 1 and not skeleton[n]} + hs = {n for n, a in self._atoms.items() if a == H and not skeleton[n]} ab = [(n, m) for n, m in ab if n not in hs and m not in hs] for n, m in ab: @@ -299,12 +304,12 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo explicit = defaultdict(list) for n, atom in atoms.items(): - if atom.atomic_number == 1 and (atom.isotope is None or atom.isotope == 1): + if atom == H and (atom.isotope is None or atom.isotope == 1): if len(bonds[n]) > 1: raise ValenceError(f'Hydrogen atom {n} has invalid valence. Try to use remove_coordinate_bonds()') for m, b in bonds[n].items(): if b == 1: - if atoms[m].atomic_number != 1: # not H-H + if atoms[m] != H: # not H-H explicit[m].append(n) elif b != 8: raise ValenceError(f'Hydrogen atom {n} has invalid valence {b.order}.') @@ -374,7 +379,7 @@ def explicify_hydrogens(self: 'MoleculeContainer', *, start_map=None, _return_ma bonds = self._bonds m = start_map if start_map is not None else max(atoms) + 1 for n in to_add: - atoms[m] = H(implicit_hydrogens=0) + atoms[m] = _H(implicit_hydrogens=0) bonds[n][m] = b = Bond(1) bonds[m] = {n: b} atoms[n]._implicit_hydrogens = 0 diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index d703083f..2283540f 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -24,6 +24,19 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +Si = 14 +P = 15 +S = 16 +As = 33 +Se = 34 +Te = 52 + + class Resonance: __slots__ = () @@ -132,36 +145,35 @@ def __entries(self: 'MoleculeContainer'): nitrogen_ani = set() sulfur_cat = set() for n, a in atoms.items(): - if a.atomic_number not in {5, 6, 7, 8, 14, 15, 16, 33, 34, 52}: + if a not in (B, C, N, O, Si, P, S, As, Se, Te): # filter non-organic set, halogens and aromatics continue elif a.is_radical: rads.add(n) elif a.charge == -1: - if (lb := len(bonds[n])) == 4 and a.atomic_number == 5: # skip boron + if (lb := len(bonds[n])) == 4 and a == B: # skip boron continue - elif lb == 6 and a.atomic_number == 15: # skip [P-]X6 + elif lb == 6 and a == P: # skip [P-]X6 continue if n in errors: # only valid anions accepted continue entries.add(n) elif a.charge == 1: lb = len(bonds[n]) - if a.atomic_number == 7: + if a == N: if lb == 4: # skip ammonia continue elif lb == 2 and a.hybridization == 3: # skip Azide (n1, b1), (n2, b2) = bonds[n].items() an1 = atoms[n1] an2 = atoms[n2] - if b1 == b2 == 2 and (an1.charge == -1 and an1.atomic_number == 7 or - an2.charge == -1 and an2.atomic_number == 7): + if b1 == b2 == 2 and (an1.charge == -1 and an1 == N or an2.charge == -1 and an2 == N): continue elif lb == 3 and a.hybridization == 2: # X=[N+](-X)-X - prevent N-N migration nitrogen_ani.add(n) - elif a.atomic_number == 15 and lb == 4: # skip [P+]R4 + elif a == P and lb == 4: # skip [P+]R4 continue - elif a.atomic_number == 16: + elif a == S: if lb == 2 and a.hybridization == 2: # ad-hoc for X-[S+]=X sulfur_cat.add(n) elif lb == 3 and a.hybridization == 1: # ad-hoc for X-[S+](-X)-X @@ -171,7 +183,7 @@ def __entries(self: 'MoleculeContainer'): if exits or entries: # try to move cation to nitrogen. saturation fixup. for n, a in self._atoms.items(): - if a.atomic_number == 7 and not a.charge: + if a == N and not a.charge: if a.hybridization == 1 and a.neighbors <= 3: # any amine - potential e-donor entries.add(n) nitrogen_cat.add(n) diff --git a/chython/algorithms/standardize/salts.py b/chython/algorithms/standardize/salts.py index d281b593..48cec856 100644 --- a/chython/algorithms/standardize/salts.py +++ b/chython/algorithms/standardize/salts.py @@ -18,12 +18,18 @@ # from typing import TYPE_CHECKING, List, Tuple, Union from ._salts import acids, rules +from ...periodictable import GroupI, GroupII if TYPE_CHECKING: from chython import MoleculeContainer +# atomic number constants +H = 1 +N = 7 + + class Salts: __slots__ = () @@ -38,7 +44,7 @@ def remove_metals(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Li metals = [] for n, a in atoms.items(): - if a.atomic_number in {7, 3, 4, 11, 12, 19, 20, 37, 38, 55, 56} and not bonds[n]: + if not bonds[n] and (a == N or isinstance(a, (GroupI, GroupII)) and a != H): metals.append(n) if 0 < len(metals) < len(self): @@ -84,16 +90,14 @@ def remove_acids(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Lis def split_metal_salts(self: 'MoleculeContainer', *, logging=False) -> Union[bool, List[Tuple[int, int]]]: """ - Split connected S-metal/lanthanides/actinides salts to cation/anion pairs. + Split connected S-metal salts to cation/anion pairs. :param logging: return deleted bonds list. """ atoms = self._atoms bonds = self._bonds - metals = [n for n, a in atoms.items() if a.atomic_number in - {3, 4, 11, 12, 19, 20, 37, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 87, 88, - 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102}] + metals = [n for n, a in atoms.items() if isinstance(a, (GroupI, GroupII)) and a != H] if metals: acceptors = set() log = [] diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index 7421d3f5..e243d6a2 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -33,6 +33,10 @@ from chython import MoleculeContainer +# atomic number constants +H = 1 +C = 6 + # 1 2 # \ | # \| @@ -165,7 +169,7 @@ def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: """ tetra = [] for n, atom in self._atoms.items(): - if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: + if atom == C and not atom.charge and not atom.is_radical: env = self._bonds[n] if all(b == 1 for b in env.values()): if sum(int(b) for b in env.values()) > 4: @@ -227,7 +231,7 @@ def stereogenic_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple for n in self.tetrahedrons: if any(not atoms[x].is_forming_single_bonds for x in bonds[n]): continue # skip metal-carbon complexes - env = tuple(x for x in bonds[n] if atoms[x].atomic_number != 1) + env = tuple(x for x in bonds[n] if atoms[x] != H) if len(env) in (3, 4): tetrahedrons[n] = env return tetrahedrons @@ -255,8 +259,8 @@ def stereogenic_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, ...], Tu if any(b == 3 or not atoms[m].is_forming_single_bonds and b != 8 for m, b in nl.items() if m != m1): continue # skip X=C=C structures and metal-carbon complexes - nn = [x for x, b in nf.items() if x != n1 and atoms[x].atomic_number != 1 and b != 8] - mn = [x for x, b in nl.items() if x != m1 and atoms[x].atomic_number != 1 and b != 8] + nn = [x for x, b in nf.items() if x != n1 and atoms[x] != H and b != 8] + mn = [x for x, b in nl.items() if x != m1 and atoms[x] != H and b != 8] if nn and mn: sn = nn[1] if len(nn) == 2 else None sm = mn[1] if len(mn) == 2 else None @@ -405,7 +409,7 @@ def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cac t1, t2 = self._stereo_allenes_terminals[c] order = self.stereogenic_allenes[c] - if atoms[m].atomic_number == 1: + if atoms[m] == H: if t1 == n: m1 = order[1] else: @@ -436,7 +440,7 @@ def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cac elif n in self.chiral_tetrahedrons: th = self.stereogenic_tetrahedrons[n] am = atoms[m] - if am.atomic_number == 1: + if am == H: order = [] for x in th: ax = atoms[x] @@ -704,7 +708,7 @@ def _translate_tetrahedron_sign(self: 'MoleculeContainer', n, env, s=None): if len(env) == 4: # hydrogen atom passed to env # hydrogen always last in order try: - order = (*order, next(x for x in env if self._atoms[x].atomic_number == 1)) # see translate scheme + order = (*order, next(x for x in env if self._atoms[x] == H)) # see translate scheme except StopIteration: raise KeyError elif len(env) != 3: # pyramid or tetrahedron expected @@ -744,7 +748,7 @@ def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): t0 = 0 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm] == H: t1 = 3 else: raise KeyError @@ -752,23 +756,23 @@ def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): t0 = 1 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm] == H: t1 = 2 else: raise KeyError - elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: + elif nn == n2 or n2 is None and self._atoms[nn] == H: t0 = 2 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm] == H: t1 = 3 else: raise KeyError - elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: + elif nn == n3 or n3 is None and self._atoms[nn] == H: t0 = 3 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm] == H: t1 = 2 else: raise KeyError @@ -798,7 +802,7 @@ def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): t0 = 0 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm] == H: t1 = 3 else: raise KeyError @@ -806,23 +810,23 @@ def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): t0 = 1 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm] == H: t1 = 2 else: raise KeyError - elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: + elif nn == n2 or n2 is None and self._atoms[nn] == H: t0 = 2 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm] == H: t1 = 3 else: raise KeyError - elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: + elif nn == n3 or n3 is None and self._atoms[nn] == H: t0 = 3 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm] == H: t1 = 2 else: raise KeyError diff --git a/chython/algorithms/tautomers/heteroarenes.py b/chython/algorithms/tautomers/heteroarenes.py index 4115d6a3..99a154f4 100644 --- a/chython/algorithms/tautomers/heteroarenes.py +++ b/chython/algorithms/tautomers/heteroarenes.py @@ -27,6 +27,13 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 + + class HeteroArenes: __slots__ = () @@ -48,7 +55,7 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): for n, ms in rings.items(): a = atoms[n] if len(ms) == 2: - if a.atomic_number in (5, 7, 15): + if a in (B, N, P): if not a.charge and not a.is_radical: # only neutral B, N, P if a.implicit_hydrogens: # pyrrole @@ -57,9 +64,9 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): acceptors.add(n) else: single_bonded.add(n) - elif a.charge == -1 and a.atomic_number == 6: # ferrocene + elif a.charge == -1 and a == C: # ferrocene single_bonded.add(n) - elif len(ms) == 3 and a.atomic_number in (5, 7, 15) and not a.charge and not a.is_radical: + elif len(ms) == 3 and a in (B, N, P) and not a.charge and not a.is_radical: single_bonded.add(n) if not donors or not acceptors: return diff --git a/chython/algorithms/tautomers/keto_enol.py b/chython/algorithms/tautomers/keto_enol.py index ddcd14d7..ba80f63b 100644 --- a/chython/algorithms/tautomers/keto_enol.py +++ b/chython/algorithms/tautomers/keto_enol.py @@ -27,6 +27,10 @@ from chython import MoleculeContainer +# atomic number constants +C = 6 + + class KetoEnol: __slots__ = () @@ -121,7 +125,7 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): cp = path.copy() cp.append((current, n, 2)) # single to double in enol end yield cp, False - elif b == bond and (a := atoms[n]).atomic_number == 6: # classic keto-enol route + elif b == bond and (a := atoms[n]) == C: # classic keto-enol route if a.hybridization == 2: # grow up stack.append((current, n, next_bond, depth)) elif hydrogen: diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 474490ee..e6263811 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -40,7 +40,12 @@ from ..algorithms.tautomers import Tautomers from ..algorithms.x3dom import X3domMolecule from ..exceptions import ValenceError -from ..periodictable import DynamicElement, Element, QueryElement, H +from ..periodictable import DynamicElement, Element, QueryElement, H as _H + + +# atomic number constants +H = 5 +C = 6 class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], Morgan, Rings, MoleculeIsomorphism, @@ -134,7 +139,7 @@ def is_radical(self) -> bool: @cached_property def molecular_mass(self) -> float: - h = H().atomic_mass + h = _H().atomic_mass return sum(a.atomic_mass + a.implicit_hydrogens * h for a in self._atoms.values()) @cached_property @@ -291,7 +296,7 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul if as_query: sub = object.__new__(QueryContainer) - lost = {n for n, a in self._atoms.items() if a.atomic_number != 1} - set(atoms) # atoms not in substructure + lost = {n for n, a in self._atoms.items() if a != H} - set(atoms) # atoms not in substructure # atoms with fully present neighbors not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} @@ -741,10 +746,9 @@ def calc_labels(self): hybridization = 3 neighbors += 1 - an = atoms[m].atomic_number - if an == 1: + if (a := atoms[m]) == H: explicit_hydrogens += 1 - elif an != 6: + elif a != C: heteroatoms += 1 atom = atoms[n] atom._neighbors = neighbors @@ -759,8 +763,7 @@ def calc_implicit(self, n: int): """ Set firs possible hydrogens count based on rules """ - atom = self._atoms[n] - if atom.atomic_number == 1: # hydrogen nether has implicit H + if (atom := self._atoms[n]) == H: # hydrogen nether has implicit H atom._implicit_hydrogens = 0 return @@ -769,7 +772,7 @@ def calc_implicit(self, n: int): aroma = 0 for m, bond in self._bonds[n].items(): if bond == 4: # only neutral carbon aromatic rings supported - if not atom.charge and not atom.is_radical and atom.atomic_number == 6: + if not atom.charge and not atom.is_radical and atom == C: aroma += 1 else: # use `kekule()` to calculate proper implicit hydrogens count atom._implicit_hydrogens = None @@ -808,8 +811,7 @@ def calc_implicit(self, n: int): atom._implicit_hydrogens = None # rule not found def check_implicit(self, n: int, h: int) -> bool: - atom = self._atoms[n] - if atom.atomic_number == 1: # hydrogen nether has implicit H + if (atom := self._atoms[n]) == H: # hydrogen nether has implicit H return h == 0 explicit_sum = 0 diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 422a46a9..c23ee616 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -22,6 +22,13 @@ from ..periodictable import Element +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 + + def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False, keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, ignore=True, ignore_carbon_radicals=False, _cls=MoleculeContainer): @@ -86,7 +93,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False a._implicit_hydrogens = h # rare H0 case if (not keep_radicals and not ignore_aromatic_radicals - and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) + and not h and not a.charge and not a.is_radical and a in (B, C, N, P) and sum(b != 8 for b in bonds[n].values()) == 2): # c[c]c - aromatic B,C,N,P radical a._is_radical = True @@ -106,7 +113,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False elif h != a.implicit_hydrogens: # H count mismatch. if a.hybridization == 4: if (not keep_radicals - and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) + and not h and not a.charge and not a.is_radical and a in (B, C, N, P) and sum(b != 8 for b in bonds[n].values()) == 2): # c[c]c - aromatic B,C,N,P radical a._implicit_hydrogens = 0 @@ -139,8 +146,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if ignore_carbon_radicals: for n in radicalized: - a = atoms[n] - if a.atomic_number == 6: + if (a := atoms[n]) == C: a._is_radical = False a._implicit_hydrogens += 1 data['log'].append(f'carbon radical {n} replaced with implicit hydrogen') diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 7818af9a..1185d661 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -334,6 +334,10 @@ def __eq__(self, other): """ compare attached to molecules elements """ + if isinstance(other, int): + return self.atomic_number == other + elif isinstance(other, str): + return self.atomic_symbol == other return isinstance(other, Element) and self.atomic_number == other.atomic_number and \ self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical From 80d21299d0582be126b34e923336ed42a3031fac Mon Sep 17 00:00:00 2001 From: stsouko Date: Thu, 14 Nov 2024 09:09:31 +0100 Subject: [PATCH 32/68] saved --- chython/algorithms/aromatics/_rules.py | 18 ++++++- chython/algorithms/aromatics/thiele.py | 17 +------ chython/algorithms/isomorphism.py | 67 ++++++++++++++++++++++++-- chython/files/_mdl/stereo.py | 2 +- chython/files/libinchi/wrapper.py | 8 +-- 5 files changed, 84 insertions(+), 28 deletions(-) diff --git a/chython/algorithms/aromatics/_rules.py b/chython/algorithms/aromatics/_rules.py index 02b061aa..49b69cd6 100644 --- a/chython/algorithms/aromatics/_rules.py +++ b/chython/algorithms/aromatics/_rules.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -104,7 +104,21 @@ def _rules(): return rules +def _freaks(): + from ... import smarts + + rules = [] + + q = smarts('[N,O,S;D2;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') + rules.append(q) + + q = smarts('[N;D3;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') + rules.append(q) + return rules + + rules = Proxy(_rules) +freak_rules = Proxy(_freaks) -__all__ = ['rules'] +__all__ = ['rules', 'freak_rules'] diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 37ef5704..c6682247 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -17,8 +17,8 @@ # along with this program; if not, see . # from collections import defaultdict -from lazy_object_proxy import Proxy from typing import TYPE_CHECKING +from ._rules import freak_rules from ..rings import _sssr, _connected_components @@ -26,21 +26,6 @@ from chython import MoleculeContainer -def _freaks(): - from ... import smarts - - rules = [] - - q = smarts('[N,O,S;D2;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') - rules.append(q) - - q = smarts('[N;D3;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') - rules.append(q) - return rules - - -freak_rules = Proxy(_freaks) - # atomic number constants B = 5 C = 6 diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 30243690..2a64bdf8 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -156,9 +156,9 @@ def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: boo :param automorphism_filter: Skip matches to the same atoms. :param searching_scope: substructure atoms list to localize isomorphism. """ - if isinstance(other, MoleculeIsomorphism): - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) - raise TypeError('MoleculeContainer expected') + if not isinstance(other, MoleculeIsomorphism): + raise TypeError('MoleculeContainer expected') + return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) @cached_property def _cython_compiled_structure(self: 'MoleculeContainer'): @@ -299,6 +299,67 @@ def get_mapping(query, scope): return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, components=components, get_mapping=get_mapping) + atoms_stereo = self._atoms_stereo + allenes_stereo = self._allenes_stereo + cis_trans_stereo = self._cis_trans_stereo + + other_atoms_stereo = other._atoms_stereo + other_allenes_stereo = other._allenes_stereo + other_cis_trans_stereo = other._cis_trans_stereo + other_translate_tetrahedron_sign = other._translate_tetrahedron_sign + other_translate_allene_sign = other._translate_allene_sign + other_translate_cis_trans_sign = other._translate_cis_trans_sign + + tetrahedrons = self.stereogenic_tetrahedrons + cis_trans = self.stereogenic_cis_trans + allenes = self.stereogenic_allenes + + oatoms = other._atoms + + for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter, + searching_scope=searching_scope): + for n, a in self._atoms.items(): + if a.stereo is None: + continue + m = mapping[n] + oa = oatoms[m] + if oa.stereo is None: # stereo in query should match only stereo atom + break + other._translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) + for n, s in atoms_stereo.items(): + m = mapping[n] + if m not in other_atoms_stereo: # self stereo atom not stereo in other + break + # translate stereo mark in other in order of self tetrahedron + if other_translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) != s: + break + else: + for n, s in allenes_stereo.items(): + m = mapping[n] + if m not in other_allenes_stereo: # self stereo allene not stereo in other + break + # translate stereo mark in other in order of self allene + nn, nm, *_ = allenes[n] + if other_translate_allene_sign(m, mapping[nn], mapping[nm]) != s: + break + else: + for nm, s in cis_trans_stereo.items(): + n, m = nm + on, om = mapping[n], mapping[m] + if (on, om) not in other_cis_trans_stereo: + if (om, on) not in other_cis_trans_stereo: + break # self stereo cis_trans not stereo in other + else: + nn, nm, *_ = cis_trans[nm] + if other_translate_cis_trans_sign(om, on, mapping[nm], mapping[nn]) != s: + break + else: + nn, nm, *_ = cis_trans[nm] + if other_translate_cis_trans_sign(on, om, mapping[nn], mapping[nm]) != s: + break + else: + yield mapping + @cached_property def _cython_compiled_query(self): # long I: diff --git a/chython/files/_mdl/stereo.py b/chython/files/_mdl/stereo.py index ce9a651c..761b6503 100644 --- a/chython/files/_mdl/stereo.py +++ b/chython/files/_mdl/stereo.py @@ -30,7 +30,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans= log = [] if calc_cis_trans: - molecule.calculate_cis_trans_from_2d() + molecule.calculate_cis_trans_from_2d(clean_cache=False) stereo = [(mapping[n], mapping[m], s) for n, m, s in data['stereo']] while stereo: diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index aaefb948..3941428e 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -53,7 +53,7 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'mapping': 0, 'x': atom.x, 'y': atom.y, 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, - 'hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope, + 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope, 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, 't': atom.implicit_tritium}) for k in range(atom.num_bonds): @@ -92,16 +92,12 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo def postprocess_molecule(molecule, data, *, ignore_stereo=False): atoms = molecule._atoms bonds = molecule._bonds - charges = molecule._charges - radicals = molecule._radicals - hydrogens = molecule._hydrogens - plane = molecule._plane # set hydrogen atoms. INCHI designed for hydrogens handling. hope correctly. free = count(len(atoms) + 1) for n, atom in enumerate(data['atoms'], 1): if atom['element'] != 'H': - hydrogens[n] = atom['hydrogens'] + atoms[n]._implicit_hydrogens = atom['hydrogens'] # in chython hydrogens never have implicit H. elif atom['hydrogens']: # >[xH]-H case m = next(free) From bf8132739934a2586cfc4eed74a458de0b0a2881 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 20 Nov 2024 18:04:40 +0100 Subject: [PATCH 33/68] parsers refactored --- chython/files/MRVrw.py | 45 ++++++++----------- chython/files/PDBrw.py | 9 +++- chython/files/_convert.py | 40 +++++++++++------ chython/files/_mapping.py | 6 +++ chython/files/_mdl/emol.py | 10 ++--- chython/files/_mdl/erxn.py | 4 +- chython/files/_mdl/mol.py | 10 ++--- chython/files/_mdl/rxn.py | 4 +- chython/files/_mdl/stereo.py | 13 +++--- chython/files/_mdl/write.py | 36 ++++++--------- chython/files/daylight/parser.py | 3 +- chython/files/daylight/smiles.py | 9 ++-- chython/files/libinchi/wrapper.py | 73 ++++++++++++++++--------------- 13 files changed, 136 insertions(+), 126 deletions(-) diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index 0a589410..3c808746 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -140,12 +140,12 @@ def read_structure(self, *, current: bool = True): mol = create_molecule(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _cls=self.molecule_cls) if not self.__ignore_stereo: postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) - mol.meta.update(meta) + if meta: + mol.meta.update(meta) return mol elif 'reaction' in data and isinstance(data['reaction'], dict): data = data['reaction'] - tmp = {'reactants': [], 'products': [], 'reagents': [], - 'meta': None, 'log': log, 'title': data.get('@title')} + tmp = {'reactants': [], 'products': [], 'reagents': [], 'log': log, 'title': data.get('@title')} n = 0 for tag, group in (('reactantList', 'reactants'), ('productList', 'products'), ('agentList', 'reagents')): @@ -174,7 +174,8 @@ def read_structure(self, *, current: bool = True): if not self.__ignore_stereo: for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) - rxn.meta.update(meta) + if meta: + rxn.meta.update(meta) return rxn else: raise ValueError('reaction or molecule expected') @@ -263,7 +264,6 @@ def _read_block(self, *, current: bool = True) -> dict: def parse_molecule(data): atoms, bonds, stereo = [], [], [] log = [] - hydrogens = {} atom_map = {} if 'atom' in data['atomArray']: da = data['atomArray']['atom'] @@ -275,20 +275,20 @@ def parse_molecule(data): 'isotope': int(atom['@isotope']) if '@isotope' in atom else None, 'charge': int(atom.get('@formalCharge', 0)), 'is_radical': '@radical' in atom, - 'mapping': int(atom.get('@mrvMap', 0))}) + 'parsed_mapping': int(atom.get('@mrvMap', 0))}) if '@z3' in atom: atoms[-1].update(x=float(atom['@x3']), y=float(atom['@y3']), z=float(atom['@z3'])) else: - atoms[-1].update(x=float(atom['@x2']) / 2, y=float(atom['@y2']) / 2, z=0.) + atoms[-1].update(x=float(atom['@x2']) / 2, y=float(atom['@y2']) / 2) if '@mrvQueryProps' in atom: raise ValueError('queries unsupported') if '@hydrogenCount' in atom: - hydrogens[n] = int(atom['@hydrogenCount']) + atoms[-1]['implicit_hydrogens'] = int(atom['@hydrogenCount']) else: atom = data['atomArray'] for n, (_id, e) in enumerate(zip(atom['@atomID'].split(), atom['@elementType'].split())): atom_map[_id] = n - atoms.append({'element': e, 'charge': 0, 'mapping': 0, 'isotope': None, 'is_radical': False}) + atoms.append({'element': e}) if '@z3' in atom: for a, x, y, z in zip(atoms, atom['@x3'].split(), atom['@y3'].split(), atom['@z3'].split()): a['x'] = float(x) @@ -298,7 +298,6 @@ def parse_molecule(data): for a, x, y in zip(atoms, atom['@x2'].split(), atom['@y2'].split()): a['x'] = float(x) / 2 a['y'] = float(y) / 2 - a['z'] = 0. if '@isotope' in atom: for a, x in zip(atoms, atom['@isotope'].split()): if x != '0': @@ -310,7 +309,7 @@ def parse_molecule(data): if '@mrvMap' in atom: for a, x in zip(atoms, atom['@mrvMap'].split()): if x != '0': - a['mapping'] = int(x) + a['parsed_mapping'] = int(x) if '@radical' in atom: for a, x in zip(atoms, atom['@radical'].split()): if x != '0': @@ -340,8 +339,8 @@ def parse_molecule(data): log.append('incorrect bondStereo tag') bonds.append((atom_map[a1], atom_map[a2], order)) - return {'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': None, 'title': data.get('@title'), 'log': log, 'atom_map': atom_map} + return {'atoms': atoms, 'bonds': bonds, 'stereo': stereo, + 'title': data.get('@title'), 'log': log, 'atom_map': atom_map} def parse_sgroup(data, molecule): @@ -486,30 +485,24 @@ def __write(self, data): file.write('\n') def __write_molecule(self, g): - gp = g._plane - gc = g._charges - gr = g._radicals bg = g._bonds - hg = g._hydrogens - hb = g.hybridization mapping = self.__mapping file = self.__file file.write('') - for n, atom in g._atoms.items(): - x, y = gp[n] - ih = hg[n] + for n, atom in g.atoms(): + x, y = atom.x, atom.y file.write(f'') file.write('') diff --git a/chython/files/PDBrw.py b/chython/files/PDBrw.py index a761e3cb..01ad869f 100644 --- a/chython/files/PDBrw.py +++ b/chython/files/PDBrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -178,6 +178,8 @@ def read_structure(self, *, current: bool = True) -> MoleculeContainer: atom_charge=charges, _cls=self.molecule_cls) mol.meta['RESIDUE'] = dict(enumerate(res, 1)) + if log: + mol.meta['chython_parsing_log'] = log if self.__parse_as_single: self.__parsed_first = mol.copy() return mol @@ -191,6 +193,11 @@ def read_structure(self, *, current: bool = True) -> MoleculeContainer: c[n] = (x, y, z) mol = self.__parsed_first.copy() mol._conformers[0] = c + if log: + if 'chython_parsing_log' in mol.meta: + mol.meta['chython_parsing_log'] = mol.meta['chython_parsing_log'] + log + else: + mol.meta['chython_parsing_log'] = log return mol def close(self, force: bool = False): diff --git a/chython/files/_convert.py b/chython/files/_convert.py index c23ee616..e25a93cd 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -33,12 +33,18 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, ignore=True, ignore_carbon_radicals=False, _cls=MoleculeContainer): g = _cls() + g._name = data.get('title') atoms = g._atoms bonds = g._bonds mapping = data['mapping'] - for n, atom in enumerate(data['atoms']): - n = mapping[n] + + if any(a.get('z') for a in data['atoms']): + # store conformer + g._conformers = [{n: (a['x'], a['y'], a['z']) for n, a in zip(mapping, data['atoms'])}] + + for n, atom in zip(mapping, data['atoms']): e = Element.from_symbol(atom.pop('element')) + atom.pop('z', None) # clean up MDL try: atoms[n] = e(**atom) except (ValueError, TypeError): @@ -60,15 +66,11 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False g.calc_labels() # set all labels except rings - if any(a.get('z') for a in data['atoms']): - # store conformer - g._conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] - - if data['log']: # store log to the meta - if data['meta'] is None: + if data.get('log'): # store log to the meta + if data.get('meta') is None: data['meta'] = {} data['meta']['chython_parsing_log'] = data['log'] - g._meta = data['meta'] + g._meta = data.get('meta') or None if skip_calc_implicit: # don't calc Hs. e.g. INCHI return g @@ -107,6 +109,8 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False elif ignore: # radical state also has errors. a._is_radical = False # reset radical state implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') @@ -121,6 +125,8 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False radicalized.append(n) elif ignore: implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') @@ -135,11 +141,15 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False elif ignore: a._is_radical = False # reset radical state implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') elif ignore: # just ignore it implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') @@ -149,10 +159,12 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if (a := atoms[n]) == C: a._is_radical = False a._implicit_hydrogens += 1 + if data.get('log') is None: + data['log'] = [] data['log'].append(f'carbon radical {n} replaced with implicit hydrogen') elif radicalized: g.meta['chython_radicalized_atoms'] = radicalized - if data['log'] and 'chython_parsing_log' not in g.meta: + if data.get('log') and 'chython_parsing_log' not in g.meta: g.meta['chython_parsing_log'] = data['log'] if implicit_mismatch: g.meta['chython_implicit_mismatch'] = implicit_mismatch @@ -177,17 +189,19 @@ def create_reaction(data, *, ignore=True, skip_calc_implicit=False, ignore_bad_i except ValueError as e: if not ignore: raise + if data.get('log') is None: + data['log'] = [] data['log'].append(f'ignored {gr} molecule {n} with {e}') tdl.append(n) if tdl: # ad-hoc for later postprocessing for n in reversed(tdl): del pms[n] - if data['log']: # store log to the meta - if data['meta'] is None: + if data.get('log'): # store log to the meta + if data.get('meta') is None: data['meta'] = {} data['meta']['chython_parsing_log'] = data['log'] - return _r_cls(rc, pr, rg, meta=data['meta'], name=data['title']) + return _r_cls(rc, pr, rg, meta=data.get('meta') or None, name=data.get('title')) __all__ = ['create_molecule'] diff --git a/chython/files/_mapping.py b/chython/files/_mapping.py index 331eaa3e..c142676e 100644 --- a/chython/files/_mapping.py +++ b/chython/files/_mapping.py @@ -34,6 +34,8 @@ def postprocess_parsed_molecule(data, *, remap=False, ignore=True): if not ignore: raise MappingError('mapping in molecules should be unique') remapped.append(next(length)) + if data.get('log') is None: + data['log'] = [] data['log'].append(f'mapping in molecule changed from {m} to {remapped[n]}') else: remapped.append(m) @@ -72,6 +74,8 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): raise MappingError('mapping in reagents or products or reactants should be unique') # force remap non unique atoms in molecules. _remap.append(next(length)) + if data.get('log') is None: + data['log'] = [] data['log'].append(f'mapping in {i} changed from {m} to {_remap[-1]}') else: _remap.append(m) @@ -83,6 +87,8 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): e = f'reagents has map intersection with reactants or products: {tmp}' if not ignore: raise MappingError(e) + if data.get('log') is None: + data['log'] = [] data['log'].append(e) maps['reagents'] = [x if x not in tmp else next(length) for x in maps['reagents']] diff --git a/chython/files/_mdl/emol.py b/chython/files/_mdl/emol.py index 9e6b4437..03b15a6a 100644 --- a/chython/files/_mdl/emol.py +++ b/chython/files/_mdl/emol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -36,7 +36,6 @@ def parse_mol_v3000(data, *, _header=True): atoms = [] bonds = [] stereo = [] - hydrogens = {} meta = {} atom_map = {} star_points = [] @@ -95,7 +94,7 @@ def parse_mol_v3000(data, *, _header=True): atom_map[n] = len(atoms) atoms.append({'element': a, 'isotope': i, 'charge': c, 'is_radical': r, - 'x': float(x), 'y': float(y), 'z': float(z), 'mapping': int(m)}) + 'x': float(x), 'y': float(y), 'z': float(z), 'parsed_mapping': int(m)}) for line in data[2 + atom_count: 2 + atom_count + bonds_count]: _, t, a1, a2, *kvs = split(line) @@ -172,14 +171,13 @@ def parse_mol_v3000(data, *, _header=True): d = v.strip('"') if a and f and d: if f == 'MRV_IMPLICIT_H': - hydrogens[a[0]] = int(d[6:]) + atoms[a[0]]['implicit_hydrogens'] = int(d[6:]) else: log.append(f'ignored SGROUP DAT {i}: {a}\t{f}\t{d}') elif _type.startswith('SRU'): raise ValueError('Polymers not supported') - return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': meta or None, 'log': log} + return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'meta': meta, 'log': log} def split(line): # todo optimize diff --git a/chython/files/_mdl/erxn.py b/chython/files/_mdl/erxn.py index 25354f9b..6b707b3e 100644 --- a/chython/files/_mdl/erxn.py +++ b/chython/files/_mdl/erxn.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -61,7 +61,7 @@ def parse_rxn_v3000(data, *, ignore=True): reagents_count -= 1 return {'reactants': molecules[:reactants_count], 'products': molecules[reactants_count:products_count], - 'reagents': molecules[products_count:], 'title': title, 'meta': None, 'log': log} + 'reagents': molecules[products_count:], 'title': title, 'log': log} __all__ = ['parse_rxn_v3000'] diff --git a/chython/files/_mdl/mol.py b/chython/files/_mdl/mol.py index 3e15cbf9..db819f2b 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/_mdl/mol.py @@ -36,7 +36,6 @@ def parse_mol_v2000(data): atoms = [] bonds = [] stereo = [] - hydrogens = {} dat = {} for line in data[4: 4 + atoms_count]: @@ -62,8 +61,8 @@ def parse_mol_v2000(data): isotope = None mapping = line[60:63] - atoms.append({'element': element, 'charge': charge, 'isotope': isotope, 'is_radical': False, - 'mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), + atoms.append({'element': element, 'charge': charge, 'isotope': isotope, + 'parsed_mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), 'z': float(line[20:30]), 'delta_isotope': delta_isotope}) for line in data[4 + atoms_count: 4 + atoms_count + bonds_count]: @@ -133,14 +132,13 @@ def parse_mol_v2000(data): value = x['value'] if len(_atoms) != 1 or _atoms[0] == -1 or not value: raise InvalidV2000(f'MRV_IMPLICIT_H spec invalid {x}') - hydrogens[_atoms[0]] = int(value[6:]) + atoms[_atoms[0]]['implicit_hydrogens'] = int(value[6:]) else: log.append(f'ignored data: {x}') except KeyError: raise InvalidV2000(f'Invalid SGROUP {x}') - return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': None, 'log': log} + return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'log': log} __all__ = ['parse_mol_v2000'] diff --git a/chython/files/_mdl/rxn.py b/chython/files/_mdl/rxn.py index d81ee459..50df40e4 100644 --- a/chython/files/_mdl/rxn.py +++ b/chython/files/_mdl/rxn.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -61,7 +61,7 @@ def parse_rxn_v2000(data, *, ignore=True): reagents_count -= 1 return {'reactants': molecules[:reactants_count], 'products': molecules[reactants_count:products_count], - 'reagents': molecules[products_count:], 'title': title, 'meta': None, 'log': log} + 'reagents': molecules[products_count:], 'title': title, 'log': log} __all__ = ['parse_rxn_v2000'] diff --git a/chython/files/_mdl/stereo.py b/chython/files/_mdl/stereo.py index 761b6503..212cb77d 100644 --- a/chython/files/_mdl/stereo.py +++ b/chython/files/_mdl/stereo.py @@ -23,11 +23,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans= if ignore_stereo: return mapping = data['mapping'] - - if 'chython_parsing_log' in molecule.meta: - log = molecule.meta['chython_parsing_log'] - else: - log = [] + log = [] if calc_cis_trans: molecule.calculate_cis_trans_from_2d(clean_cache=False) @@ -57,8 +53,11 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans= continue break - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log + if log: + if 'chython_parsing_log' not in molecule.meta: + molecule.meta['chython_parsing_log'] = log + else: + molecule.meta['chython_parsing_log'].extend(log) __all__ = ['postprocess_molecule'] diff --git a/chython/files/_mdl/write.py b/chython/files/_mdl/write.py index c6bfc1bd..3319c60d 100644 --- a/chython/files/_mdl/write.py +++ b/chython/files/_mdl/write.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -77,10 +77,7 @@ def _write_molecule(self, g, write3d=None): else: z = 0 - gc = g._charges - gr = g._radicals - gp = g._plane - gb = g._bonds + bonds = g._bonds file = self._file file.write(f'M V30 BEGIN CTAB\nM V30 COUNTS {g.atoms_count} {g.bonds_count} 0 0 0\nM V30 BEGIN ATOM\n') @@ -90,11 +87,10 @@ def _write_molecule(self, g, write3d=None): x, y, z = xyz[m] z = f'{z:.4f}' else: - x, y = gp[m] + x, y = a.x, a.y - c = gc[m] - c = f' CHG={c}' if c else '' - r = ' RAD=2' if gr[m] else '' + c = f' CHG={a.charge}' if a.charge else '' + r = ' RAD=2' if a.is_radical else '' i = f' MASS={a.isotope}' if a.isotope else '' if not self._mapping: @@ -107,7 +103,7 @@ def _write_molecule(self, g, write3d=None): wedge = defaultdict(set) i = 0 # trick for empty wedge_map for i, (n, m, s) in enumerate(g._wedge_map, start=1): - file.write(f'M V30 {i} {gb[n][m].order} {mapping[n]} {mapping[m]} CFG={s == 1 and "1" or "3"}\n') + file.write(f'M V30 {i} {bonds[n][m].order} {mapping[n]} {mapping[m]} CFG={s == 1 and "1" or "3"}\n') wedge[n].add(m) wedge[m].add(n) @@ -130,10 +126,7 @@ def _write_molecule(self, g, write3d=None): else: z = 0. - gc = g._charges - gr = g._radicals - gp = g._plane - gb = g._bonds + bonds = g._bonds file = self._file file.write(f'{g.name}\n\n\n{g.atoms_count:3d}{g.bonds_count:3d} 0 0 0 0 999 V2000\n') @@ -142,9 +135,9 @@ def _write_molecule(self, g, write3d=None): if write3d is not None: x, y, z = xyz[m] else: - x, y = gp[m] + x, y = a.x, a.y - c = charge_map[gc[m]] + c = charge_map[a.charge] if not self._mapping: m = 0 file.write(f'{x:10.4f}{y:10.4f}{z:10.4f} {a.atomic_symbol:3s} 0{c} 0 0 0 0 0 0 0{m:3d} 0 0\n') @@ -152,21 +145,20 @@ def _write_molecule(self, g, write3d=None): atoms = {m: n for n, m in enumerate(g._atoms, start=1)} wedge = defaultdict(set) for n, m, s in g._wedge_map: - file.write(f'{atoms[n]:3d}{atoms[m]:3d} {gb[n][m].order} {s == 1 and "1" or "6"} 0 0 0\n') + file.write(f'{atoms[n]:3d}{atoms[m]:3d} {bonds[n][m].order} {s == 1 and "1" or "6"} 0 0 0\n') wedge[n].add(m) wedge[m].add(n) for n, m, b in g.bonds(): if m not in wedge[n]: file.write(f'{atoms[n]:3d}{atoms[m]:3d} {b.order} 0 0 0 0\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, a in enumerate(g._atoms.values(), start=1): if a.isotope: file.write(f'M ISO 1 {n:3d} {a.isotope:3d}\n') - if gr[m]: + if a.is_radical: file.write(f'M RAD 1 {n:3d} 2\n') # invalid for carbenes - c = gc[m] - if c in (-4, 4): - file.write(f'M CHG 1 {n:3d} {c:3d}\n') + if a.charge in (-4, 4): + file.write(f'M CHG 1 {n:3d} {a.charge:3d}\n') file.write('M END\n') diff --git a/chython/files/daylight/parser.py b/chython/files/daylight/parser.py index 3cab6272..f685a359 100644 --- a/chython/files/daylight/parser.py +++ b/chython/files/daylight/parser.py @@ -147,8 +147,7 @@ def parser(tokens, strong_cycle): elif previous: raise IncorrectSmiles('bond on the end') - return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_bonds': stereo_bonds, 'log': log, - 'title': None, 'meta': None} + return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_bonds': stereo_bonds, 'log': log} __all__ = ['parser'] diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 410df35a..442195f8 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -78,7 +78,7 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: contract = None if '>' in smi: - record = {'reactants': [], 'reagents': [], 'products': [], 'log': log, 'meta': None, 'title': None} + record = {'reactants': [], 'reagents': [], 'products': [], 'log': log} try: reactants, reagents, products = smi.split('>') except ValueError as e: @@ -237,8 +237,11 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): continue break - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log + if log: + if 'chython_parsing_log' not in molecule.meta: + molecule.meta['chython_parsing_log'] = log + else: + molecule.meta['chython_parsing_log'].extend(log) __all__ = ['smiles'] diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 3941428e..215a2ba7 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -24,7 +24,7 @@ from ...containers import MoleculeContainer from ...containers.bonds import Bond from ...exceptions import ValenceError, IsChiral, NotChiral -from ...periodictable import H +from ...periodictable import H as _H try: @@ -33,6 +33,9 @@ from importlib_resources import files, as_file +H = 1 + + def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> MoleculeContainer: """ INCHI string parser @@ -46,15 +49,23 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo raise ValueError('invalid INCHI') atoms, bonds = [], [] + protium = {} + deuterium = {} + tritium = {} seen = set() for n in range(structure.num_atoms): seen.add(n) atom = structure.atom[n] - atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'mapping': 0, 'x': atom.x, 'y': atom.y, + atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'x': atom.x, 'y': atom.y, 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, - 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope, - 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, 't': atom.implicit_tritium}) + 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope}) + if atom.implicit_protium: + protium[n] = atom.implicit_protium + if atom.implicit_deuterium: + deuterium[n] = atom.implicit_deuterium + if atom.implicit_tritium: + tritium[n] = atom.implicit_tritium for k in range(atom.num_bonds): m = atom.neighbor[k] @@ -82,8 +93,9 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo lib.FreeStructFromINCHI(byref(structure)) - tmp = {'atoms': atoms, 'bonds': bonds, 'stereo_atoms': stereo_atoms, 'stereo_allenes': stereo_allenes, 'log': [], - 'stereo_cumulenes': stereo_cumulenes, 'mapping': list(range(1, len(atoms) + 1)), 'title': None, 'meta': None} + tmp = {'atoms': atoms, 'bonds': bonds, 'stereo_atoms': stereo_atoms, 'stereo_allenes': stereo_allenes, + 'stereo_cumulenes': stereo_cumulenes, 'mapping': list(range(1, len(atoms) + 1)), + 'protium': protium, 'deuterium': deuterium, 'tritium': tritium} mol = create_molecule(tmp, skip_calc_implicit=True, _cls=_cls) postprocess_molecule(mol, tmp, ignore_stereo=ignore_stereo) return mol @@ -95,38 +107,27 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): # set hydrogen atoms. INCHI designed for hydrogens handling. hope correctly. free = count(len(atoms) + 1) - for n, atom in enumerate(data['atoms'], 1): - if atom['element'] != 'H': - atoms[n]._implicit_hydrogens = atom['hydrogens'] - # in chython hydrogens never have implicit H. - elif atom['hydrogens']: # >[xH]-H case - m = next(free) - charges[m] = 0 - radicals[m] = False - plane[m] = (0., 0.) - hydrogens[n] = 0 - hydrogens[m] = 0 - atoms[m] = a = H() - a._attach_graph(molecule, m) + to_add = [] + for n, atom in atoms.items(): + # in chython hydrogens never have implicit H. convert to explicit + if atom == H and atom.implicit_hydrogens: + for _ in range(atom.implicit_hydrogens): + to_add.append((n, next(free), _H(implicit_hydrogens=0))) + atom._implicit_hydrogens = 0 + + for n, p in data['protium'].items(): + to_add.append((n + 1, next(free), _H(isotope=1, implicit_hydrogens=0))) + for n, p in data['deuterium'].items(): + to_add.append((n + 1, next(free), _H(isotope=2, implicit_hydrogens=0))) + for n, p in data['tritium'].items(): + to_add.append((n + 1, next(free), _H(isotope=3, implicit_hydrogens=0))) + + if to_add: + for n, m, a in to_add: + atoms[m] = a bonds[n][m] = b = Bond(1) bonds[m] = {n: b} - b._attach_graph(molecule, n, m) - else: # H+, H* or >H-[xH] cases - hydrogens[n] = 0 - # convert isotopic implicit hydrogens to explicit - for i, k in enumerate(('p', 'd', 't'), 1): - if atom[k]: - for _ in range(atom[k]): - m = next(free) - charges[m] = 0 - radicals[m] = False - plane[m] = (0., 0.) - hydrogens[m] = 0 - atoms[m] = a = H(i) - a._attach_graph(molecule, m) - bonds[n][m] = b = Bond(1) - bonds[m] = {n: b} - b._attach_graph(molecule, n, m) + molecule.calc_labels() # reset labels if ignore_stereo or not data['stereo_atoms'] and not data['stereo_cumulenes'] and not data['stereo_allenes']: return From 90176ac8c1404b6cb76fdcb36bcc1532f51045e5 Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 20 Nov 2024 19:51:49 +0100 Subject: [PATCH 34/68] fixes --- chython/containers/reaction.py | 6 +++--- chython/files/MRVrw.py | 1 - chython/files/_mdl/emol.py | 6 +++--- chython/files/_mdl/mol.py | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index bbb6509f..e34185e8 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -277,7 +277,7 @@ def __format__(self, format_spec): sig = [] count = 0 contract = [] - orders = [] + radicals = [] for ml in (self.__reactants, self.__reagents, self.__products): mso = [(m, *m.__format__(format_spec, _return_order=True)) for m in ml] @@ -292,13 +292,13 @@ def __format__(self, format_spec): else: count += 1 - orders.append((m, o)) + radicals.extend(m.atom(n).is_radical for n in o) ss.append(s) sig.append('.'.join(ss)) if not format_spec or '!x' not in format_spec: cx = [] - if r := ','.join(str(n) for n, (m, a) in enumerate((m, a) for m, o in orders for a in o) if m._radicals[a]): + if r := ','.join(str(n) for n, r in enumerate(radicals) if r): cx.append(f'^1:{r}') if contract: cx.append(f"f:{','.join('.'.join(x) for x in contract)}") diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index 3c808746..ab969b21 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -29,7 +29,6 @@ from ..exceptions import EmptyMolecule, EmptyReaction -organic_set = {'B', 'C', 'N', 'O', 'P', 'S', 'Se', 'F', 'Cl', 'Br', 'I'} bond_map = {8: '1" queryType="Any', 4: 'A', 1: '1', 2: '2', 3: '3', 'Any': 8, 'any': 8, 'A': 4, 'a': 4, '1': 1, '2': 2, '3': 3} diff --git a/chython/files/_mdl/emol.py b/chython/files/_mdl/emol.py index 03b15a6a..a5a5475b 100644 --- a/chython/files/_mdl/emol.py +++ b/chython/files/_mdl/emol.py @@ -149,13 +149,13 @@ def parse_mol_v3000(data, *, _header=True): drop = True for line in data[3 + atom_count + bonds_count:]: - if line.startswith('M V30 END CTAB'): + if line.startswith('END CTAB'): break elif drop: - if line.startswith('M V30 BEGIN SGROUP'): + if line.startswith('BEGIN SGROUP'): drop = False continue - elif line.startswith('M V30 END SGROUP'): + elif line.startswith('END SGROUP'): break _, _type, i, *kvs = split(line) diff --git a/chython/files/_mdl/mol.py b/chython/files/_mdl/mol.py index db819f2b..93913a89 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/_mdl/mol.py @@ -122,7 +122,7 @@ def parse_mol_v2000(data): log.append(f'ignored line: {line}') for a in atoms: - if a['is_radical']: # int to bool + if 'is_radical' in a: # int to bool a['is_radical'] = True for x in dat.values(): try: From 6b5d5418cfcf176e30a09a95f41c4faebe85812a Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 20 Nov 2024 21:14:35 +0100 Subject: [PATCH 35/68] Refactor container attribute access and remove unused methods Replaced double underscore attributes with single underscore to simplify access. Removed hashed bytes representation method and streamlined `pack` and `unpack` functions across multiple classes by adding shorthand equivalents `pach` and `unpach`. Additionally, updated copyrights and deleted unnecessary import and cache flushes. --- chython/algorithms/mapping/attention.py | 3 +- chython/algorithms/smiles.py | 5 - chython/algorithms/standardize/reaction.py | 21 ++-- chython/containers/molecule.py | 13 +++ chython/containers/reaction.py | 119 ++++++++++----------- 5 files changed, 78 insertions(+), 83 deletions(-) diff --git a/chython/algorithms/mapping/attention.py b/chython/algorithms/mapping/attention.py index e8c75ff0..bc4e870a 100644 --- a/chython/algorithms/mapping/attention.py +++ b/chython/algorithms/mapping/attention.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # @@ -33,6 +33,7 @@ class Attention: __slots__ = () + __class_cache__ = {} def reset_mapping(self: Union['ReactionContainer', 'Attention'], *, return_score: bool = False, multiplier=1.75, keep_reactants_numbering=False) -> Union[bool, float]: diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index fecbae0d..5f463035 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -21,7 +21,6 @@ from CachedMethods import cached_method from collections import defaultdict from functools import cached_property -from hashlib import sha512 from heapq import heappop, heappush from itertools import product from random import random @@ -148,10 +147,6 @@ def __eq__(self, other): def __hash__(self): return hash(str(self)) - @cached_method - def __bytes__(self): - return sha512(str(self).encode()).digest() - @cached_property def smiles_atoms_order(self) -> Tuple[int, ...]: """ diff --git a/chython/algorithms/standardize/reaction.py b/chython/algorithms/standardize/reaction.py index 1cb20f28..8f5ab282 100644 --- a/chython/algorithms/standardize/reaction.py +++ b/chython/algorithms/standardize/reaction.py @@ -272,10 +272,9 @@ def __remove_reagents_rules(self: 'ReactionContainer', keep_reagents): tmp.extend(reagents_st2) reagents = tuple(tmp) if keep_reagents else () - self._ReactionContainer__reactants = tuple(reactants_st2) - self._ReactionContainer__products = tuple(products_st2) - self._ReactionContainer__reagents = reagents - self.flush_cache() + self._reactants = tuple(reactants_st2) + self._products = tuple(products_st2) + self._reagents = reagents self.fix_positions() return True @@ -307,10 +306,9 @@ def __remove_reagents_mapping(self: 'ReactionContainer', keep_reagents): reagents = tuple(tmp) if keep_reagents else () if len(reactants) != len(self.reactants) or len(products) != len(self.products) or len(reagents) != len(self.reagents): - self._ReactionContainer__reactants = tuple(reactants) - self._ReactionContainer__products = tuple(products) - self._ReactionContainer__reagents = reagents - self.flush_cache() + self._reactants = tuple(reactants) + self._products = tuple(products) + self._reagents = reagents self.fix_positions() return True return False @@ -327,7 +325,7 @@ def contract_ions(self: 'ReactionContainer') -> bool: salts = _contract_ions(anions, cations, total) if salts: neutral.extend(salts) - self._ReactionContainer__reagents = tuple(neutral) + self._reagents = tuple(neutral) changed = True else: changed = False @@ -338,7 +336,7 @@ def contract_ions(self: 'ReactionContainer') -> bool: anions_order = {frozenset(m): n for n, m in enumerate(anions)} cations_order = {frozenset(m): n for n, m in enumerate(cations)} neutral.extend(salts) - self._ReactionContainer__reactants = tuple(neutral) + self._reactants = tuple(neutral) changed = True else: anions_order = cations_order = {} @@ -350,11 +348,10 @@ def contract_ions(self: 'ReactionContainer') -> bool: salts = _contract_ions(anions, cations, total) if salts: neutral.extend(salts) - self._ReactionContainer__products = tuple(neutral) + self._products = tuple(neutral) changed = True if changed: - self.flush_cache() self.fix_positions() return True return False diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index e6263811..5ac7fb43 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -527,6 +527,9 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non return compress(data, 9) return data + def pach(self, *, compressed=True, check=True, version=2, order: List[int] = None) -> bytes: + return self.pack(compressed=compressed, check=check, version=version, order=order) + @classmethod def pack_len(cls, data: bytes, /, *, compressed=True) -> int: """ @@ -586,6 +589,16 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, return mol, pack_length return mol + @classmethod + def unpach(cls, data: Union[bytes, memoryview], /, *, compressed=True) -> 'MoleculeContainer': + """ + Unpack from compressed bytes. + """ + return cls.unpack(data, compressed=compressed) + + def __bytes__(self): + return self.pack() + def _cpack(self, order=None, check=True): if order is None: order = list(self._atoms) diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index e34185e8..2d154924 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,11 +18,10 @@ # from CachedMethods import cached_method from functools import reduce -from hashlib import sha512 from itertools import chain from math import ceil from operator import itemgetter, or_ -from typing import Dict, Iterable, Iterator, Optional, Tuple, List +from typing import Dict, Iterator, Optional, Tuple, List, Sequence from zlib import compress, decompress from .cgr import CGRContainer from .molecule import MoleculeContainer @@ -38,11 +37,10 @@ class ReactionContainer(StandardizeReaction, Mapping, Calculate2DReaction, Depic Reaction storage hashable and comparable. based on reaction unique signature (SMILES). """ - __slots__ = ('__reactants', '__products', '__reagents', '__meta', '__name', '_arrow', '_signs', '__dict__') - __class_cache__ = {} + __slots__ = ('_reactants', '_products', '_reagents', '_meta', '_name', '_arrow', '_signs', '__dict__') - def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterable[MoleculeContainer] = (), - reagents: Iterable[MoleculeContainer] = (), meta: Optional[Dict] = None, name: Optional[str] = None): + def __init__(self, reactants: Sequence[MoleculeContainer] = (), products: Sequence[MoleculeContainer] = (), + reagents: Sequence[MoleculeContainer] = (), meta: Optional[Dict] = None, name: Optional[str] = None): """ New reaction object creation @@ -60,15 +58,15 @@ def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterab elif not all(isinstance(x, MoleculeContainer) for x in chain(reactants, products, reagents)): raise TypeError(f'MoleculeContainers expected') - self.__reactants = reactants - self.__products = products - self.__reagents = reagents + self._reactants = reactants + self._products = products + self._reagents = reagents if meta is None: - self.__meta = None + self._meta = None else: - self.__meta = dict(meta) + self._meta = dict(meta) if name is None: - self.__name = None + self._name = None else: self.name = name self._arrow = None @@ -76,21 +74,21 @@ def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterab @property def reactants(self) -> Tuple[MoleculeContainer, ...]: - return self.__reactants + return self._reactants @property def reagents(self) -> Tuple[MoleculeContainer, ...]: - return self.__reagents + return self._reagents @property def products(self) -> Tuple[MoleculeContainer, ...]: - return self.__products + return self._products def molecules(self) -> Iterator[MoleculeContainer]: """ Iterator of all reaction molecules """ - return chain(self.__reactants, self.__reagents, self.__products) + return chain(self.reactants, self.reagents, self.products) @property def meta(self) -> Dict: @@ -98,33 +96,33 @@ def meta(self) -> Dict: Dictionary of metadata. Like DTYPE-DATUM in RDF """ - if self.__meta is None: - self.__meta = {} # lazy - return self.__meta + if self._meta is None: + self._meta = {} # lazy + return self._meta @property def name(self) -> str: - return self.__name or '' + return self._name or '' @name.setter def name(self, name: str): if not isinstance(name, str): raise TypeError('name should be string up to 80 symbols') - self.__name = name + self._name = name def copy(self) -> 'ReactionContainer': """ Get copy of object """ copy = object.__new__(self.__class__) - copy._ReactionContainer__reactants = tuple(x.copy() for x in self.__reactants) - copy._ReactionContainer__products = tuple(x.copy() for x in self.__products) - copy._ReactionContainer__reagents = tuple(x.copy() for x in self.__reagents) - copy._ReactionContainer__name = self.__name - if self.__meta is None: - copy._ReactionContainer__meta = None + copy._reactants = tuple(x.copy() for x in self.reactants) + copy._products = tuple(x.copy() for x in self.products) + copy._reagents = tuple(x.copy() for x in self.reagents) + copy._name = self._name + if self._meta is None: + copy._meta = None else: - copy._ReactionContainer__meta = self.__meta.copy() + copy._meta = self._meta.copy() copy._arrow = self._arrow copy._signs = self._signs return copy @@ -137,23 +135,23 @@ def compose(self) -> CGRContainer: Reagents will be presented as unchanged molecules :return: CGRContainer """ - rr = self.__reagents + self.__reactants + rr = self.reagents + self.reactants if rr: r = reduce(or_, rr) else: r = MoleculeContainer() - if self.__products: - p = reduce(or_, self.__products) + if self.products: + p = reduce(or_, self.products) else: p = MoleculeContainer() return r ^ p - def flush_cache(self): + def flush_cache(self, **kwargs): self.__dict__.clear() for m in self.molecules(): - m.flush_cache() + m.flush_cache(**kwargs) - def pack(self, *, compressed=True, check=True): + def pack(self, *, compressed=True, check=True) -> bytes: """ Pack into compressed bytes. @@ -172,12 +170,18 @@ def pack(self, *, compressed=True, check=True): :param compressed: return zlib-compressed pack. :param check: check molecules for format restrictions. """ - data = b''.join((bytearray((1, len(self.__reactants), len(self.__reagents), len(self.__products))), + data = b''.join((bytearray((1, len(self.reactants), len(self.reagents), len(self.products))), *(m.pack(compressed=False, check=check) for m in self.molecules()))) if compressed: return compress(data, 9) return data + def pach(self, *, compressed=True, check=True) -> bytes: + """ + Pack into compressed bytes. + """ + return self.pack(compressed=compressed, check=check) + @classmethod def pack_len(cls, data: bytes, /, *, compressed=True) -> Tuple[List[int], List[int], List[int]]: """ @@ -225,7 +229,7 @@ def unpack(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': raise ValueError('invalid pack header') reactants, reagents, products = data[1], data[2], data[3] - molecules = [] + molecules: List[MoleculeContainer] = [] shift = 4 for _ in range(reactants + reagents + products): m, pl = MoleculeContainer.unpack(data[shift:], compressed=False, _return_pack_length=True) @@ -233,6 +237,16 @@ def unpack(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': shift += pl return cls(molecules[:reactants], molecules[-products:], molecules[reactants: -products]) + @classmethod + def unpach(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': + """ + Unpack from compressed bytes. + """ + return cls.unpack(data, compressed=compressed) + + def __bytes__(self): + return self.pack() + def __invert__(self) -> CGRContainer: """ Get CGR of reaction @@ -246,15 +260,11 @@ def __eq__(self, other): def __hash__(self): return hash(str(self)) - @cached_method - def __bytes__(self): - return sha512(str(self).encode()).digest() - def __bool__(self): """ Exists both reactants and products """ - return bool(self.__reactants and self.__products) + return bool(self.reactants and self.products) @cached_method def __str__(self): @@ -279,7 +289,7 @@ def __format__(self, format_spec): contract = [] radicals = [] - for ml in (self.__reactants, self.__reagents, self.__products): + for ml in (self.reactants, self.reagents, self.products): mso = [(m, *m.__format__(format_spec, _return_order=True)) for m in ml] if not format_spec or '!c' not in format_spec: mso.sort(key=itemgetter(1)) @@ -306,29 +316,8 @@ def __format__(self, format_spec): return f"{'>'.join(sig)} |{','.join(cx)}|" return '>'.join(sig) - @cached_method def __len__(self): - return len(self.__reactants) + len(self.__products) + len(self.__reagents) - - def __getstate__(self): - state = {'reactants': self.__reactants, 'products': self.__products, 'reagents': self.__reagents, - 'meta': self.__meta, 'name': self.__name, 'arrow': self._arrow, 'signs': self._signs} - from chython import pickle_cache - - if pickle_cache: - state['cache'] = self.__dict__ - return state - - def __setstate__(self, state): - self.__reactants = state['reactants'] - self.__products = state['products'] - self.__reagents = state['reagents'] - self.__meta = state['meta'] - self.__name = state['name'] - self._arrow = state['arrow'] - self._signs = state['signs'] - if 'cache' in state: - self.__dict__.update(state['cache']) + return len(self.reactants) + len(self.products) + len(self.reagents) __all__ = ['ReactionContainer'] From 3ab3629273e3bfbc2bb7271bd88c6aec4382454c Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 22 Nov 2024 08:41:43 +0100 Subject: [PATCH 36/68] saved --- chython/reactor/base.py | 6 +++++- chython/reactor/reactor.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/chython/reactor/base.py b/chython/reactor/base.py index 16f8b918..fae981bc 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -21,7 +21,7 @@ from itertools import product from ..containers import MoleculeContainer, QueryContainer from ..containers.bonds import Bond -from ..periodictable import Element, ListElement, AnyElement +from ..periodictable import Element, ListElement, AnyElement, QueryElement class BaseReactor: @@ -34,6 +34,10 @@ def __init__(self, reactants, products, delete_atoms, fix_rings, fix_tautomers): self.__variable = variable = [] atoms = defaultdict(dict) + if isinstance(products, MoleculeContainer): + # full replacement of atoms + for n, atom in products.atoms(): + elements[n] = atom.copy(hydrogens=True, stereo=True) for n, atom in products.atoms(): atoms[n].update(charge=atom.charge, is_radical=atom.is_radical) if atom.atomic_number: # replace atom diff --git a/chython/reactor/reactor.py b/chython/reactor/reactor.py index 08cb024c..ce74a6d1 100644 --- a/chython/reactor/reactor.py +++ b/chython/reactor/reactor.py @@ -69,7 +69,7 @@ def __init__(self, patterns: Tuple[QueryContainer, ...], self.__polymerise_limit = polymerise_limit self.__products_atoms = tuple(set(m) for m in products) self.__automorphism_filter = automorphism_filter - super().__init__({n for x in patterns for n, h in x._masked.items() if not h}, reduce(or_, products), + super().__init__({n for x in patterns for n, a in x.atoms() if not a.masked}, reduce(or_, products), delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, *structures: MoleculeContainer): From 41e2b1fcb9f4764d66516001073c772a2b7034c2 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 10:00:36 +0100 Subject: [PATCH 37/68] fixes. removed overoptimizations. --- chython/algorithms/depict.py | 2 +- chython/algorithms/fingerprints/__init__.py | 2 +- chython/algorithms/isomorphism.py | 4 ++-- chython/algorithms/mcs.py | 4 ++-- chython/algorithms/morgan.py | 8 ++++---- chython/algorithms/standardize/molecule.py | 18 ++++++++---------- chython/algorithms/standardize/resonance.py | 6 +++--- chython/algorithms/standardize/saturation.py | 8 ++++---- chython/algorithms/stereo.py | 8 ++++---- chython/algorithms/x3dom.py | 4 ++-- chython/containers/graph.py | 4 ++-- chython/containers/molecule.py | 4 ++-- chython/files/_mdl/emol.py | 2 +- chython/files/_mdl/erxn.py | 2 +- chython/files/_mdl/mol.py | 2 +- chython/files/_mdl/rxn.py | 2 +- chython/files/_mdl/write.py | 8 ++++---- chython/files/libinchi/wrapper.py | 4 ++-- 18 files changed, 45 insertions(+), 47 deletions(-) diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index 73cf2319..b0819b03 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -351,7 +351,7 @@ def __render_atoms(self: 'MoleculeContainer', uid): define = [] mask = [] - for n, atom in self._atoms.items(): + for n, atom in self.atoms(): x, y = atom.x, -atom.y symbol = atom.atomic_symbol if (symbol != 'C' or atom.charge or atom.is_radical or atom.isotope or carbon diff --git a/chython/algorithms/fingerprints/__init__.py b/chython/algorithms/fingerprints/__init__.py index 0f6febf1..ec2121fa 100644 --- a/chython/algorithms/fingerprints/__init__.py +++ b/chython/algorithms/fingerprints/__init__.py @@ -32,7 +32,7 @@ class Fingerprints(LinearFingerprint, MorganFingerprint): @property def _atom_identifiers(self: 'MoleculeContainer'): return {idx: hash((atom.isotope or 0, atom.atomic_number, atom.charge, atom.is_radical)) - for idx, atom in self._atoms.items()} + for idx, atom in self.atoms()} class FingerprintsCGR(LinearFingerprint, MorganFingerprint): diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 2a64bdf8..43586504 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -190,7 +190,7 @@ def _cython_compiled_structure(self: 'MoleculeContainer'): bits2 = [] bits3 = [] bits4 = [] - for i, (n, a) in enumerate(self._atoms.items()): + for i, (n, a) in enumerate(self.atoms()): mapping[n] = i numbers.append(n) v2 = 1 << (a.hybridization - 1) @@ -318,7 +318,7 @@ def get_mapping(query, scope): for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope): - for n, a in self._atoms.items(): + for n, a in self.atoms(): if a.stereo is None: continue m = mapping[n] diff --git a/chython/algorithms/mcs.py b/chython/algorithms/mcs.py index 437d2dcf..8e1bf41b 100644 --- a/chython/algorithms/mcs.py +++ b/chython/algorithms/mcs.py @@ -97,10 +97,10 @@ def __get_product(self: 'molecule.MoleculeContainer', other: 'molecule.MoleculeC o_bonds = other._bonds s_equal = defaultdict(list) # equal self atoms - for n, atom in self._atoms.items(): + for n, atom in self.atoms(): s_equal[atom].append(n) p_equal = defaultdict(list) # equal other atoms - for n, atom in other._atoms.items(): + for n, atom in other.atoms(): p_equal[atom].append(n) full_product = {} diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index e200cbc3..c56b5572 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -40,12 +40,12 @@ def atoms_order(self: 'MoleculeContainer') -> Dict[int, int]: :return: dict of atom-order pairs """ - if not self._atoms: # for empty containers + if not self: # for empty containers return {} - elif len(self._atoms) == 1: # optimize single atom containers - return dict.fromkeys(self._atoms, 1) + elif len(self) == 1: # optimize single atom containers + return dict.fromkeys(self, 1) ring = self.ring_atoms - return _morgan({n: hash((hash(a), n in ring)) for n, a in self._atoms.items()}, self.int_adjacency) + return _morgan({n: hash((hash(a), n in ring)) for n, a in self.atoms()}, self.int_adjacency) @cached_property def int_adjacency(self: 'MoleculeContainer') -> Dict[int, Dict[int, int]]: diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 5f4e5e77..92840c9b 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -55,7 +55,7 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F h, changed = self.implicify_hydrogens(_fix_stereo=False, logging=True) if fix_tautomers and (logging or keep_kekule): # thiele can change tautomeric form - hgs = {n: a.implicit_hydrogens for n, a in self._atoms.items()} + hgs = {n: a.implicit_hydrogens for n, a in self.atoms()} if keep_kekule: # save bond orders bonds = [(b, b.order) for _, _, b in self.bonds()] @@ -66,7 +66,7 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F if keep_kekule and t: # restore # check ring charge/hydrogen moving - if c or fix_tautomers and hgs != self._hydrogens: # noqa + if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self.atoms()): self.kekule() # we need to do full kekule again else: for b, o in bonds: # noqa @@ -81,12 +81,12 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F s.append((tuple(changed), -1, 'implicified')) if t: s.append(((), -1, 'aromatized')) - if fix_tautomers and (x := tuple(n for n, a in self._atoms.items() if hgs[n] != a.implicit_hydrogens)): + if fix_tautomers and (x := tuple(n for n, a in self.atoms() if hgs[n] != a.implicit_hydrogens)): s.append((x, -1, 'aromatic tautomer found')) if c: s.append((tuple(c), -1, 'recharged')) if keep_kekule and t: - if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self._atoms.items()): + if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self.atoms()): s.append(((), -1, 'kekulized again')) else: s.append(((), -1, 'kekule form restored')) @@ -123,7 +123,7 @@ def standardize(self: Union['MoleculeContainer', 'Standardize'], *, logging=Fals log.extend(l) fixed.update(f) - if b := fixed.intersection(n for n, a in self._atoms.items() if a.implicit_hydrogens is None): + if b := fixed.intersection(n for n, a in self.atoms() if a.implicit_hydrogens is None): if ignore: log.append((tuple(b), -1, 'standardization failed')) else: @@ -271,17 +271,15 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, :param keep_to_terminal: Keep any bonds to terminal hydrogens :return: removed bonds count """ - bonds = self._bonds - ab = [(n, m) for n, m, b in self.bonds() if b == 8] if keep_to_terminal: skeleton = self.not_special_connectivity - hs = {n for n, a in self._atoms.items() if a == H and not skeleton[n]} + hs = {n for n, a in self.atoms() if a == H and not skeleton[n]} ab = [(n, m) for n, m in ab if n not in hs and m not in hs] for n, m in ab: - del bonds[n][m], bonds[m][n] + self.delete_bond(n, m, _skip_calculation=True) if ab: self.flush_cache(keep_sssr=True) @@ -404,7 +402,7 @@ def check_valence(self: 'MoleculeContainer') -> List[int]: :return: list of invalid atoms """ # only invalid atoms have None hydrogens. - return [n for n, a in self._atoms.items() if a.implicit_hydrogens is None] + return [n for n, a in self.atoms() if a.implicit_hydrogens is None] def clean_isotopes(self: 'MoleculeContainer') -> bool: """ diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 2283540f..593b4ef5 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -135,7 +135,7 @@ def __find_delocalize_path(self: 'MoleculeContainer', start, finish, constrains, def __entries(self: 'MoleculeContainer'): atoms = self._atoms bonds = self._bonds - errors = {n for n, a in atoms.items() if a.implicit_hydrogens is None} + errors = {n for n, a in self.atoms() if a.implicit_hydrogens is None} transfer = set() entries = set() @@ -144,7 +144,7 @@ def __entries(self: 'MoleculeContainer'): nitrogen_cat = set() nitrogen_ani = set() sulfur_cat = set() - for n, a in atoms.items(): + for n, a in self.atoms(): if a not in (B, C, N, O, Si, P, S, As, Se, Te): # filter non-organic set, halogens and aromatics continue @@ -182,7 +182,7 @@ def __entries(self: 'MoleculeContainer'): transfer.add(n) if exits or entries: # try to move cation to nitrogen. saturation fixup. - for n, a in self._atoms.items(): + for n, a in self.atoms(): if a == N and not a.charge: if a.hybridization == 1 and a.neighbors <= 3: # any amine - potential e-donor entries.add(n) diff --git a/chython/algorithms/standardize/saturation.py b/chython/algorithms/standardize/saturation.py index 38c5bb1e..03fa9c1e 100644 --- a/chython/algorithms/standardize/saturation.py +++ b/chython/algorithms/standardize/saturation.py @@ -76,11 +76,11 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, expected_charge = int(self) if reset_electrons: - charges = {x: None for x in self._atoms} - radicals = {x: None for x in self._atoms} + charges = {x: None for x in self} + radicals = {x: None for x in self} else: - charges = {n: a.charge for n, a in self._atoms.items()} - radicals = {n: a.is_radical for n, a in self._atoms.items()} + charges = {n: a.charge for n, a in self.atoms()} + radicals = {n: a.is_radical for n, a in self.atoms()} sat, adjacency = _find_possible_valences(atoms, neighbors_distances or self._bonds, charges, radicals, neighbors_distances is not None) charges = {} # new charge states diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index e243d6a2..1be70e3a 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -168,7 +168,7 @@ def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: Carbon sp3 atom numbers. """ tetra = [] - for n, atom in self._atoms.items(): + for n, atom in self.atoms(): if atom == C and not atom.charge and not atom.is_radical: env = self._bonds[n] if all(b == 1 for b in env.values()): @@ -577,7 +577,7 @@ def fix_stereo(self: 'MoleculeContainer'): atoms_stereo = [] allenes_stereo = [] cis_trans_stereo = [] - for n, a in self._atoms.items(): + for n, a in self.atoms(): if a.stereo is None: continue elif n in stereo_tetrahedrons: @@ -960,7 +960,7 @@ def __wedge_sign(self: 'MoleculeContainer', order): @cached_property def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: - stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} + stereo_atoms = {n for n, a in self.atoms() if a.stereo is not None} stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} if not stereo_atoms and not stereo_bonds: return self.atoms_order @@ -1103,7 +1103,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): chiral_c.add(n) # skip already marked. - stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} + stereo_atoms = {n for n, a in self.atoms() if a.stereo is not None} chiral_t.difference_update(stereo_atoms) chiral_a.difference_update(stereo_atoms) diff = set() diff --git a/chython/algorithms/x3dom.py b/chython/algorithms/x3dom.py index 9d59160d..73779280 100644 --- a/chython/algorithms/x3dom.py +++ b/chython/algorithms/x3dom.py @@ -181,7 +181,7 @@ def __render_atoms(self: 'MoleculeContainer', xyz): atoms = [] if carbon: - for n, a in self._atoms.items(): + for n, a in self.atoms(): r = radius or a.atomic_radius * multiplier fr = r * 0.71 atoms.append(f" \n" @@ -197,7 +197,7 @@ def __render_atoms(self: 'MoleculeContainer', xyz): f" \n \n" " \n \n \n \n") else: - for n, a in self._atoms.items(): + for n, a in self.atoms(): r = radius or a.atomic_radius * multiplier atoms.append(f" \n" " \n \n" diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 4586969e..f644ebb6 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -122,7 +122,7 @@ def copy(self): copy of graph """ copy = object.__new__(self.__class__) - copy._atoms = {n: atom.copy(full=True) for n, atom in self._atoms.items()} + copy._atoms = {n: atom.copy(full=True) for n, atom in self.atoms()} copy._bonds = cb = {} for n, m_bond in self._bonds.items(): cb[n] = cbn = {} @@ -144,7 +144,7 @@ def remap(self, mapping: Dict[int, int]): raise ValueError('mapping overlap') mg = mapping.get - self._atoms = {mg(n, n): atom for n, atom in self._atoms.items()} + self._atoms = {mg(n, n): atom for n, atom in self.atoms()} self._bonds = {mg(n, n): {mg(m, m): bond for m, bond in m_bond.items()} for n, m_bond in self._bonds.items()} self.flush_cache() diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 5ac7fb43..a8bd6094 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -292,11 +292,11 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul raise ValueError('empty atoms list not allowed') if set(atoms) - self._atoms.keys(): raise ValueError('invalid atom numbers') - atoms = tuple(n for n in self._atoms if n in atoms) # save original order + atoms = tuple(n for n in self if n in atoms) # save original order if as_query: sub = object.__new__(QueryContainer) - lost = {n for n, a in self._atoms.items() if a != H} - set(atoms) # atoms not in substructure + lost = {n for n, a in self.atoms() if a != H} - set(atoms) # atoms not in substructure # atoms with fully present neighbors not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} diff --git a/chython/files/_mdl/emol.py b/chython/files/_mdl/emol.py index a5a5475b..e8390a9c 100644 --- a/chython/files/_mdl/emol.py +++ b/chython/files/_mdl/emol.py @@ -21,7 +21,7 @@ def parse_mol_v3000(data, *, _header=True): if _header: - title = data[1].strip() or None + title = data[0].strip() or None data = data[4:] else: title = None diff --git a/chython/files/_mdl/erxn.py b/chython/files/_mdl/erxn.py index 6b707b3e..d088cabe 100644 --- a/chython/files/_mdl/erxn.py +++ b/chython/files/_mdl/erxn.py @@ -29,7 +29,7 @@ def parse_rxn_v3000(data, *, ignore=True): if not reagents_count: raise EmptyReaction - title = data[2].strip() or None + title = data[1].strip() or None log = [] molecules = [] diff --git a/chython/files/_mdl/mol.py b/chython/files/_mdl/mol.py index 93913a89..88b21373 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/_mdl/mol.py @@ -32,7 +32,7 @@ def parse_mol_v2000(data): raise EmptyMolecule log = [] - title = data[1].strip() or None + title = data[0].strip() or None atoms = [] bonds = [] stereo = [] diff --git a/chython/files/_mdl/rxn.py b/chython/files/_mdl/rxn.py index 50df40e4..56977fe1 100644 --- a/chython/files/_mdl/rxn.py +++ b/chython/files/_mdl/rxn.py @@ -29,7 +29,7 @@ def parse_rxn_v2000(data, *, ignore=True): if not reagents_count: raise EmptyReaction - title = data[2].strip() or None + title = data[1].strip() or None log = [] molecules = [] diff --git a/chython/files/_mdl/write.py b/chython/files/_mdl/write.py index 3319c60d..a998251e 100644 --- a/chython/files/_mdl/write.py +++ b/chython/files/_mdl/write.py @@ -82,7 +82,7 @@ def _write_molecule(self, g, write3d=None): file = self._file file.write(f'M V30 BEGIN CTAB\nM V30 COUNTS {g.atoms_count} {g.bonds_count} 0 0 0\nM V30 BEGIN ATOM\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, (m, a) in enumerate(g.atoms(), start=1): if write3d is not None: x, y, z = xyz[m] z = f'{z:.4f}' @@ -131,7 +131,7 @@ def _write_molecule(self, g, write3d=None): file = self._file file.write(f'{g.name}\n\n\n{g.atoms_count:3d}{g.bonds_count:3d} 0 0 0 0 999 V2000\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, (m, a) in enumerate(g.atoms(), start=1): if write3d is not None: x, y, z = xyz[m] else: @@ -142,7 +142,7 @@ def _write_molecule(self, g, write3d=None): m = 0 file.write(f'{x:10.4f}{y:10.4f}{z:10.4f} {a.atomic_symbol:3s} 0{c} 0 0 0 0 0 0 0{m:3d} 0 0\n') - atoms = {m: n for n, m in enumerate(g._atoms, start=1)} + atoms = {m: n for n, m in enumerate(g, start=1)} wedge = defaultdict(set) for n, m, s in g._wedge_map: file.write(f'{atoms[n]:3d}{atoms[m]:3d} {bonds[n][m].order} {s == 1 and "1" or "6"} 0 0 0\n') @@ -152,7 +152,7 @@ def _write_molecule(self, g, write3d=None): if m not in wedge[n]: file.write(f'{atoms[n]:3d}{atoms[m]:3d} {b.order} 0 0 0 0\n') - for n, a in enumerate(g._atoms.values(), start=1): + for n, (_, a) in enumerate(g.atoms(), start=1): if a.isotope: file.write(f'M ISO 1 {n:3d} {a.isotope:3d}\n') if a.is_radical: diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 215a2ba7..8d583fb6 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -201,8 +201,8 @@ def isotope(self): @property def delta_isotope(self): - if self.isotope > 9000: - return self.isotope - 10_000 + if self.isotopic_mass > 9000: + return self.isotopic_mass - 10_000 @property def is_radical(self): From 41e1486778d58677104a68060c84261ae3267142 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 10:52:18 +0100 Subject: [PATCH 38/68] removed overoptimizations. --- chython/algorithms/depict.py | 26 ++++++++++------------ chython/algorithms/isomorphism.py | 4 ++-- chython/algorithms/standardize/molecule.py | 2 +- chython/algorithms/stereo.py | 7 +++--- chython/containers/molecule.py | 10 ++++----- chython/utils/grid.py | 23 ++++++++++--------- chython/utils/retro.py | 19 ++++++++-------- 7 files changed, 45 insertions(+), 46 deletions(-) diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index b0819b03..5b00406b 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -206,17 +206,16 @@ def depict(self: Union['MoleculeContainer', 'DepictMolecule'], *, width=None, he :param clean2d: calculate coordinates if necessary. """ uid = str(uuid4()) - atoms = self._atoms.values() - min_x = min(a.x for a in atoms) - max_x = max(a.x for a in atoms) - min_y = min(a.y for a in atoms) - max_y = max(a.y for a in atoms) + min_x = min(a.x for _, a in self.atoms()) + max_x = max(a.x for _, a in self.atoms()) + min_y = min(a.y for _, a in self.atoms()) + max_y = max(a.y for _, a in self.atoms()) if clean2d and len(self) > 1 and max_y - min_y < .01 and max_x - min_x < 0.01: self.clean2d() - min_x = min(a.x for a in atoms) - max_x = max(a.x for a in atoms) - min_y = min(a.y for a in atoms) - max_y = max(a.y for a in atoms) + min_x = min(a.x for _, a in self.atoms()) + max_x = max(a.x for _, a in self.atoms()) + min_y = min(a.y for _, a in self.atoms()) + max_y = max(a.y for _, a in self.atoms()) bonds = self.__render_bonds() atoms, define, masks = self.__render_atoms(uid) @@ -455,11 +454,10 @@ def depict(self: 'ReactionContainer', *, width=None, height=None, clean2d: bool if clean2d: for m in self.molecules(): if len(m) > 1: - atoms = m._atoms.values() - min_x = min(a.x for a in atoms) - max_x = max(a.x for a in atoms) - min_y = min(a.y for a in atoms) - max_y = max(a.y for a in atoms) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() self.fix_positions() diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 43586504..23257e18 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -128,8 +128,8 @@ def __contains__(self: 'MoleculeContainer', other: Union[Element, Query, str]): Atom in Structure test. """ if isinstance(other, str): - return any(other == x.atomic_symbol for x in self._atoms.values()) - return any(other == x for x in self._atoms.values()) + return any(other == a.atomic_symbol for _, a in self.atoms()) + return any(other == a for _, a in self.atoms()) def is_automorphic(self): """ diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 92840c9b..65ce7e85 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -409,7 +409,7 @@ def clean_isotopes(self: 'MoleculeContainer') -> bool: Clean isotope marks from molecule. Return True if any isotope found. """ - isotopes = [x for x in self._atoms.values() if x.isotope] + isotopes = [a for _, a in self.atoms() if a.isotope] if isotopes: for i in isotopes: i._isotope = None diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index 1be70e3a..fd19fa75 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -155,11 +155,10 @@ def clean_stereo(self: 'MoleculeContainer'): """ Remove stereo data. """ - for a in self._atoms.values(): + for _, a in self.atoms(): a._stereo = None - for _, bs in self._bonds: - for b in bs.values(): - b._stereo = None # flush twice, but it should be still faster + for *_, b in self.bonds(): + b._stereo = None self.flush_cache(keep_sssr=True, keep_components=True) @cached_property diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index a8bd6094..984f845c 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -128,25 +128,25 @@ def molecular_charge(self) -> int: """ Total charge of molecule """ - return sum(a.charge for a in self._atoms.values()) + return sum(a.charge for _, a in self.atoms()) @cached_property def is_radical(self) -> bool: """ True if at least one atom is radical """ - return any(a.is_radical for a in self._atoms.values()) + return any(a.is_radical for _, a in self.atoms()) @cached_property def molecular_mass(self) -> float: h = _H().atomic_mass - return sum(a.atomic_mass + a.implicit_hydrogens * h for a in self._atoms.values()) + return sum(a.atomic_mass + a.implicit_hydrogens * h for _, a in self.atoms()) @cached_property def brutto(self) -> Dict[str, int]: """Counted atoms dict""" - c = Counter(a.atomic_symbol for a in self._atoms.values()) - c['H'] += sum(a.implicit_hydrogens for a in self._atoms.values()) + c = Counter(a.atomic_symbol for _, a in self.atoms()) + c['H'] += sum(a.implicit_hydrogens for _, a in self.atoms()) return dict(c) @cached_property diff --git a/chython/utils/grid.py b/chython/utils/grid.py index cc15d718..1a771718 100644 --- a/chython/utils/grid.py +++ b/chython/utils/grid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # @@ -52,11 +52,10 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] if clean2d: for m in molecules: if len(m) > 1: - values = m._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() @@ -65,12 +64,12 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] for m in ms: if m is None: break - min_y = min(y for x, y in m._plane.values()) - max_y = max(y for x, y in m._plane.values()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) h = max_y - min_y if row_height < h: # get height of row row_height = h - planes.append(m._plane.copy()) + planes.append([a.xy for _, a in m.atoms()]) max_x = 0. for m in ms: @@ -88,8 +87,10 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] shift_y -= row_height + 4. * font_size # restore planes - for p, m in zip(planes, molecules): - m._plane = p + for m, p in zip(molecules, planes): + for (_, a), (x, y) in zip(m.atoms(), p): + a.x = x + a.y = y _width = shift_x - 1.5 * font_size _height = -shift_y - 1.5 * font_size diff --git a/chython/utils/retro.py b/chython/utils/retro.py index d94ec666..8fa1aaec 100644 --- a/chython/utils/retro.py +++ b/chython/utils/retro.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # Copyright 2021 Alexander Sizov # This file is part of chython. # @@ -66,22 +66,21 @@ def retro_depict(tree: Tree, *, y_gap=3., x_gap=5., width=None, height=None, cle if clean2d: for m in column: if len(m) > 1: - values = m._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() - heights = [max(y for _, y in m._plane.values()) - min(y for _, y in m._plane.values()) for m in column] + heights = [max(a.y for _, a in m.atoms()) - min(a.y for _, a in m.atoms()) for m in column] y_shift = sum(heights) + y_gap * (len(heights) - 1) # column height with gaps if y_shift > c_max_y: c_max_y = y_shift y_shift /= 2. # center align for m, h in zip(column, heights): - plane = m._plane.copy() # backup + plane = [a.xy for _, a in m.atoms()] # backup mx = m._fix_plane_min(x_shift, -y_shift) if mx > c_max_x: c_max_x = mx @@ -92,7 +91,9 @@ def retro_depict(tree: Tree, *, y_gap=3., x_gap=5., width=None, height=None, cle y_shift -= h + y_gap render.append(m.depict(_embedding=True)[:5]) - m._plane = plane # restore + for (_, a), (x, y) in zip(m.atoms(), plane): # restore + a.x = x + a.y = y x_shift = c_max_x + x_gap # between columns gap last_layer = current_layer From 534c983eeb62ef8ef9f37a1b63badfea01750ee2 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 10:59:34 +0100 Subject: [PATCH 39/68] fixed FWA --- chython/utils/free_wilson.py | 38 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/chython/utils/free_wilson.py b/chython/utils/free_wilson.py index e836aa6d..9ee415c3 100644 --- a/chython/utils/free_wilson.py +++ b/chython/utils/free_wilson.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -42,31 +42,31 @@ def fw_prepare_groups(core: Union[MoleculeContainer, QueryContainer], molecule: cs = set(core_map.values()) groups = molecule.substructure(molecule._atoms.keys() - cs, recalculate_hydrogens=False) gs = set(groups) - hs = molecule._hydrogens - hgs = groups._hydrogens - plane = molecule._plane cf = molecule.substructure(cs, recalculate_hydrogens=False) - chs = cf._hydrogens for n, m, b in molecule.bonds(): if n in cs: if m in gs: - h = H() - h._Core__isotope = reverse[n] # mark mapping to isotope - groups.add_bond(groups.add_atom(h, xy=plane[n]), m, b.copy()) - hgs[m] = hs[m] # restore H count - - cf.add_bond(cf.add_atom(h.copy(), xy=plane[m]), n, b.copy()) - chs[n] = hs[n] + a = molecule.atom(n) + h = H(x=a.x, y=a.y) + h._isotope = reverse[n] # mark mapping to isotope + groups.add_bond(groups.add_atom(h, _skip_calculation=True), m, b.copy(), _skip_calculation=True) + + a = molecule.atom(m) + h = H(x=a.x, y=a.y) + h._isotope = reverse[n] # mark mapping to isotope + cf.add_bond(cf.add_atom(h, _skip_calculation=True), n, b.copy(), _skip_calculation=True) elif m in cs and n in gs: - h = H() - h._Core__isotope = reverse[m] - groups.add_bond(groups.add_atom(h, xy=plane[m]), n, b.copy()) - hgs[n] = hs[n] - - cf.add_bond(cf.add_atom(h.copy(), xy=plane[n]), m, b.copy()) - chs[m] = hs[m] + a = molecule.atom(m) + h = H(x=a.x, y=a.y) + h._isotope = reverse[m] + groups.add_bond(groups.add_atom(h, _skip_calculation=True), n, b.copy(), _skip_calculation=True) + + a = molecule.atom(n) + h = H(x=a.x, y=a.y) + h._isotope = reverse[m] # mark mapping to isotope + cf.add_bond(cf.add_atom(h.copy(), _skip_calculation=True), n, b.copy(), _skip_calculation=True) groups = groups.split() groups.insert(0, cf) return groups From f3e302dbb20f28f02b9093c332da228643da7837 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 11:02:06 +0100 Subject: [PATCH 40/68] cleaning --- chython/exceptions.py | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/chython/exceptions.py b/chython/exceptions.py index 891340fc..6f47d503 100644 --- a/chython/exceptions.py +++ b/chython/exceptions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -66,30 +66,6 @@ class InvalidAromaticRing(ValueError): """ -class IsConnectedAtom(Exception): - """ - Atom is already attached to graph - """ - - -class IsNotConnectedAtom(Exception): - """ - Atom is not attached to graph - """ - - -class IsConnectedBond(Exception): - """ - Bond is already attached to graph - """ - - -class IsNotConnectedBond(Exception): - """ - Bond is not attached to graph - """ - - class ValenceError(Exception): """ Atom has valence error From a12b4b35fe0e952331603a02094cf86372d02144 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 21:20:13 +0100 Subject: [PATCH 41/68] fixes. better cache management --- chython/algorithms/standardize/reaction.py | 16 ++++++++-------- chython/containers/bonds.py | 4 ++-- chython/containers/graph.py | 2 ++ chython/containers/reaction.py | 7 ++++--- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/chython/algorithms/standardize/reaction.py b/chython/algorithms/standardize/reaction.py index 8f5ab282..e6e5ddff 100644 --- a/chython/algorithms/standardize/reaction.py +++ b/chython/algorithms/standardize/reaction.py @@ -50,7 +50,7 @@ def canonicalize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging total.extend((-1, x, -1, m) for m, x in self.fix_groups_mapping(logging=True)) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) if logging: return total return bool(total) @@ -76,7 +76,7 @@ def standardize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging= total.extend((-1, x, -1, m) for m, x in self.fix_groups_mapping(logging=True)) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) if logging: return total return bool(total) @@ -93,7 +93,7 @@ def thiele(self: 'ReactionContainer', *, fix_tautomers=True) -> bool: if m.thiele(fix_tautomers=fix_tautomers): total = True if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def kekule(self: 'ReactionContainer', *, buffer_size=7) -> bool: @@ -108,7 +108,7 @@ def kekule(self: 'ReactionContainer', *, buffer_size=7) -> bool: if m.kekule(buffer_size=buffer_size): total = True if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def clean_isotopes(self: 'ReactionContainer') -> bool: @@ -121,7 +121,7 @@ def clean_isotopes(self: 'ReactionContainer') -> bool: if m.clean_isotopes(): flag = True if flag: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return flag def clean_stereo(self: 'ReactionContainer'): @@ -130,7 +130,7 @@ def clean_stereo(self: 'ReactionContainer'): """ for m in self.molecules(): m.clean_stereo() - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) def check_valence(self: 'ReactionContainer') -> List[Tuple[int, Tuple[int, ...]]]: """ @@ -155,7 +155,7 @@ def implicify_hydrogens(self: 'ReactionContainer') -> int: for m in self.molecules(): total += m.implicify_hydrogens() if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def explicify_hydrogens(self: 'ReactionContainer') -> int: @@ -203,7 +203,7 @@ def explicify_hydrogens(self: 'ReactionContainer') -> int: m.remap(remap) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def remove_reagents(self, *, keep_reagents: bool = False, mapping: bool = True) -> bool: diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index 43847d51..76e408ce 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -22,13 +22,13 @@ class Bond: __slots__ = ('_order', '_in_ring', '_stereo') - def __init__(self, order: int): + def __init__(self, order: int, *, stereo: Optional[bool] = None): if not isinstance(order, int): raise TypeError('invalid order value') elif order not in (1, 4, 2, 3, 8): raise ValueError('order should be from [1, 2, 3, 4, 8]') self._order = order - self._stereo = None + self._stereo = stereo def __eq__(self, other): if isinstance(other, int): diff --git a/chython/containers/graph.py b/chython/containers/graph.py index f644ebb6..51fb0412 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -165,6 +165,8 @@ def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): u = self.copy() if copy else self u._atoms.update(other._atoms) u._bonds.update(other._bonds) + if not copy: + self.flush_cache() return u def flush_cache(self): diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index 2d154924..ca13c5e5 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -146,10 +146,11 @@ def compose(self) -> CGRContainer: p = MoleculeContainer() return r ^ p - def flush_cache(self, **kwargs): + def flush_cache(self, keep_molecule_cache=False, **kwargs): self.__dict__.clear() - for m in self.molecules(): - m.flush_cache(**kwargs) + if not keep_molecule_cache: + for m in self.molecules(): + m.flush_cache(**kwargs) def pack(self, *, compressed=True, check=True) -> bytes: """ From 057d615e2b967fa941ba0352ef3a4f4a48c8acdc Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 22:30:13 +0100 Subject: [PATCH 42/68] saved --- chython/algorithms/isomorphism.py | 6 +- chython/reactor/base.py | 281 ++++++++++++++---------------- chython/reactor/transformer.py | 3 +- 3 files changed, 135 insertions(+), 155 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 23257e18..dc062591 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -295,10 +295,10 @@ def get_mapping(query, scope): array('I', [n in scope for n in other])) else: components = get_mapping = None + yield from self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, + components=components, get_mapping=get_mapping) + return # todo: implement stereo - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, - components=components, get_mapping=get_mapping) - atoms_stereo = self._atoms_stereo allenes_stereo = self._allenes_stereo cis_trans_stereo = self._cis_trans_stereo diff --git a/chython/reactor/base.py b/chython/reactor/base.py index fae981bc..ca128cbf 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -19,189 +19,170 @@ # from collections import defaultdict from itertools import product +from typing import Union from ..containers import MoleculeContainer, QueryContainer from ..containers.bonds import Bond -from ..periodictable import Element, ListElement, AnyElement, QueryElement +from ..periodictable import Element, ListElement, AnyElement, QueryElement, AnyMetal class BaseReactor: - def __init__(self, reactants, products, delete_atoms, fix_rings, fix_tautomers): - self.__to_delete = reactants.difference(products) if delete_atoms else () - - # prepare atoms patch - self.__elements = elements = {} - self.__hydrogens = hydrogens = {} - self.__variable = variable = [] - - atoms = defaultdict(dict) - if isinstance(products, MoleculeContainer): - # full replacement of atoms - for n, atom in products.atoms(): - elements[n] = atom.copy(hydrogens=True, stereo=True) - for n, atom in products.atoms(): - atoms[n].update(charge=atom.charge, is_radical=atom.is_radical) - if atom.atomic_number: # replace atom - elements[n] = Element.from_atomic_number(atom.atomic_number)(atom.isotope) - if n not in reactants and isinstance(products, MoleculeContainer): - atoms[n]['xy'] = atom.xy - if atom.implicit_hydrogens is not None: - hydrogens[n] = atom.implicit_hydrogens # save available H count - elif n not in reactants: - if not isinstance(atom, ListElement): - raise ValueError('New atom should be defined') - elements[n] = [Element.from_symbol(x)() for x in atom._elements] - variable.append(n) - else: # use atom from reactant - if not isinstance(atom, AnyElement): - raise ValueError('Only AnyElement can be used for matched atom propagation') - elements[n] = None - - if isinstance(products, QueryContainer): - bonds = [] - for n, m, b in products.bonds(): + def __init__(self, pattern, replacement, delete_atoms, fix_rings, fix_tautomers): + if isinstance(replacement, QueryContainer): + for n, a in replacement.atoms(): + if not isinstance(a, (AnyElement, QueryElement)): + raise TypeError('Unsupported query atom type') + for *_, b in replacement.bonds(): if len(b.order) > 1: - raise ValueError('bond list in patch not supported') - else: - bonds.append((n, m, Bond(b.order[0]))) - else: - bonds = [(n, m, b.copy()) for n, m, b in products.bonds()] + raise ValueError('Variable bond in replacement') - self.__bonds = bonds - self.__atom_attrs = dict(atoms) - self.__products = products - self.__fix_rings = fix_rings - self.__fix_tautomers = fix_tautomers + self._to_delete = {n for n, a in pattern.atoms() if not a.masked} - set(replacement) if delete_atoms else () + self._replacement = replacement + self._fix_rings = fix_rings + self._fix_tautomers = fix_tautomers def _patcher(self, structure: MoleculeContainer, mapping): - elements = self.__elements - variable = self.__variable - - new = self.__prepare_skeleton(structure, mapping) - self.__set_stereo(new, structure, mapping) + new = self._prepare_skeleton(structure, mapping) + self._fix_stereo(new, structure, mapping) - if not variable: - if self.__fix_rings: - new.kekule() # keeps stereo as is - if not new.thiele(fix_tautomers=self.__fix_tautomers): # fixes stereo if any ring aromatized - new.fix_stereo() - else: + if self._fix_rings: + new.kekule() # keeps stereo as is + if not new.thiele(fix_tautomers=self._fix_tautomers): # fixes stereo if any ring aromatized new.fix_stereo() - yield new else: - copy = new.copy() - if self.__fix_rings: - copy.kekule() - if not copy.thiele(fix_tautomers=self.__fix_tautomers): - copy.fix_stereo() - else: - copy.fix_stereo() - yield copy + new.fix_stereo() + yield new + + def _get_deleted(self, structure, mapping): + if not self._to_delete: + return set() - for atoms in product(*(elements[x][1:] for x in variable)): - copy = new.copy() - for n, atom in zip(variable, atoms): - n = mapping[n] - # replace atom - copy._atoms[n] = a = atom.copy() # noqa - a._attach_graph(copy, n) # noqa - copy.calc_implicit(n) # noqa - if self.__fix_rings: - copy.kekule() - if not copy.thiele(fix_tautomers=self.__fix_tautomers): - copy.fix_stereo() - else: - copy.fix_stereo() + bonds = structure._bonds + to_delete = {mapping[x] for x in self._to_delete} + # if deleted atoms have another path to remain fragment, the path is preserved + remain = set(mapping.values()).difference(to_delete) + delete, global_seen = set(), set() + for x in to_delete: + for n in bonds[x]: + if n in global_seen or n in remain: + continue + seen = {n} + global_seen.add(n) + stack = [x for x in bonds[n] if x not in global_seen] + while stack: + current = stack.pop() + if current in remain: + break + if current in to_delete: + continue + seen.add(current) + global_seen.add(current) + stack.extend([x for x in bonds[current] if x not in global_seen]) else: - copy.fix_stereo() - yield copy + delete.update(seen) - def __prepare_skeleton(self, structure, mapping): - elements = self.__elements - patch_hydrogens = self.__hydrogens - patch_bonds = self.__bonds - variable = self.__variable + to_delete.update(delete) + return to_delete + def _prepare_skeleton(self, structure, mapping): atoms = structure._atoms - plane = structure._plane bonds = structure._bonds - charges = structure._charges - radicals = structure._radicals - hydrogens = structure._hydrogens - - to_delete = {mapping[x] for x in self.__to_delete} - if to_delete: - # if deleted atoms have another path to remain fragment, the path is preserved - remain = set(mapping.values()).difference(to_delete) - delete, global_seen = set(), set() - for x in to_delete: - for n in bonds[x]: - if n in global_seen or n in remain: - continue - seen = {n} - global_seen.add(n) - stack = [x for x in bonds[n] if x not in global_seen] - while stack: - current = stack.pop() - if current in remain: - break - if current in to_delete: - continue - seen.add(current) - global_seen.add(current) - stack.extend([x for x in bonds[current] if x not in global_seen]) - else: - delete.update(seen) - - to_delete.update(delete) + to_delete = self._get_deleted(structure, mapping) new = structure.__class__() - keep_hydrogens = {} + natoms = new._atoms + nbonds = new._bonds max_atom = max(atoms) - for n, atom in self.__atom_attrs.items(): - if n in mapping: # add matched atoms - m = mapping[n] - e = elements[n] - if e is None: - e = atoms[m] - new.add_atom(e.copy(), m, xy=plane[m], _skip_hydrogen_calculation=True, **atom) - else: # new atoms - max_atom += 1 - if n in variable: - # use first from the list - mapping[n] = new.add_atom(elements[n][0].copy(), max_atom, _skip_hydrogen_calculation=True, **atom) + stereo_atoms = [] + stereo_bonds = [] + + for n, a in self._replacement.atoms(): + if isinstance(a, AnyElement): + if n := mapping.get(n): + # keep matched atom type and isotope + e = atoms[n].copy(stereo=True) + e.charge = a.charge + e.is_radical = a.is_radical + if a.stereo is not None: # override stereo + e._stereo = a.stereo + elif e.stereo is not None: # keep original stereo + stereo_atoms.append(n) # mark for stereo fix + natoms[n] = e + nbonds[n] = {} + else: + raise ValueError("AnyElement doesn't match to pattern") + else: # QueryElement or Element + a: Union[QueryElement, Element] # typehint + e = Element.from_atomic_number(a.atomic_number) + e = e(a.isotope, charge=a.charge, is_radical=a.is_radical, stereo=a.stereo) + if not (m := mapping.get(n)): # new atom + m = max_atom + 1 + max_atom += 1 + mapping[n] = m + if isinstance(a, Element): + e._implicit_hydrogens = a.implicit_hydrogens # keep H count from patch + e.x = a.x # keep coordinates from patch + e.y = a.y + elif len(a.implicit_hydrogens) == 1: + e._implicit_hydrogens = a.implicit_hydrogens[0] + elif a.implicit_hydrogens: + raise ValueError('Query element in patch has more than one implicit hydrogen') + else: # existing atoms + b = atoms[m] + e.x = b.x # preserve existing coordinates + e.y = b.y + if a.stereo is None and b.stereo is not None: # keep original stereo + e._stereo = b.stereo + stereo_atoms.append(m) + natoms[m] = e + nbonds[m] = {} + + # preserve connectivity order + for n, bs in self._replacement._bonds.items(): + n = mapping[n] + for m, b in bs.items(): + m = mapping[m] + if n in nbonds[m]: + nbonds[n][m] = nbonds[m][n] else: - mapping[n] = new.add_atom(elements[n].copy(), max_atom, _skip_hydrogen_calculation=True, **atom) - if n in patch_hydrogens: # keep patch aromatic atoms hydrogens count - keep_hydrogens[max_atom] = patch_hydrogens[n] + nbonds[n][m] = b = Bond(int(b), stereo=b.stereo) + if b.stereo is None: + if not (nb := bonds.get(n)): + continue + if not (mb := nb.get(m)): + continue + if mb.stereo is None: + continue + # original structure has stereo bond + b._stereo = mb.stereo + stereo_bonds.append((n, m)) patch_atoms = set(new) # don't move! - for n, atom in structure.atoms(): # add unmatched atoms + for n, a in atoms.items(): # add unmatched or masked atoms if n not in patch_atoms and n not in to_delete: - new.add_atom(atom.copy(), n, charge=charges[n], is_radical=radicals[n], xy=plane[n], - _skip_hydrogen_calculation=True) - keep_hydrogens[n] = hydrogens[n] # keep hydrogens on unmatched atoms as is. - - for n, m, bond in patch_bonds: # add patch bonds - new.add_bond(mapping[n], mapping[m], bond.copy(), _skip_hydrogen_calculation=True) + natoms[n] = a.copy(hydrogens=True, stereo=True) + nbonds[n] = {} - for n, m_bond in bonds.items(): + for n, bs in bonds.items(): if n in to_delete: # atoms for removing continue - to_delete.add(n) # reuse to_delete set for seen atoms - for m, bond in m_bond.items(): + for m, b in bs.items(): # ignore deleted atoms and patch atoms if m in to_delete or n in patch_atoms and m in patch_atoms: continue - new.add_bond(n, m, bond.copy(), _skip_hydrogen_calculation=True) - - # fix hydrogens count. - new._hydrogens.update(keep_hydrogens) # noqa - for n in new: - if n not in keep_hydrogens: - new.calc_implicit(n) # noqa + elif n in nbonds[m]: + nbonds[n][m] = nbonds[m][n] + else: + nbonds[n][m] = b.copy(stereo=True) + if b.stereo is not None and (n in patch_atoms or m in patch_atoms): + stereo_bonds.append((n, m)) + + for n, a in new.atoms(): + if a.implicit_hydrogens is None: + new.calc_implicit(n) + new.calc_labels() return new - def __set_stereo(self, new, structure, mapping): + def _fix_stereo(self, new, structure, mapping): products = self.__products stereo_override = set() r_mapping = {m: n for n, m in mapping.items()} diff --git a/chython/reactor/transformer.py b/chython/reactor/transformer.py index d2be81e7..5852cc47 100644 --- a/chython/reactor/transformer.py +++ b/chython/reactor/transformer.py @@ -47,8 +47,7 @@ def __init__(self, pattern: QueryContainer, replacement: Union[MoleculeContainer self.replacement = replacement self.__automorphism_filter = automorphism_filter self.__copy_metadata = copy_metadata - super().__init__({n for n, h in pattern._masked.items() if not h}, replacement, delete_atoms, - fix_aromatic_rings, fix_tautomers) + super().__init__(pattern, replacement, delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, structure: MoleculeContainer): if not isinstance(structure, MoleculeContainer): From f294c3c44f644eab3c0db730374df5656ee3a923 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Nov 2024 17:52:47 +0100 Subject: [PATCH 43/68] Refactor: Update mdl module import paths Renamed '_mdl' directory to 'mdl' and updated all corresponding import statements. This change improves code readability and aligns directory naming conventions across the project. --- chython/files/MRVrw.py | 2 +- chython/files/RDFrw.py | 4 ++-- chython/files/SDFrw.py | 2 +- chython/files/{_mdl => mdl}/__init__.py | 0 chython/files/{_mdl => mdl}/emol.py | 0 chython/files/{_mdl => mdl}/erxn.py | 0 chython/files/{_mdl => mdl}/mol.py | 0 chython/files/{_mdl => mdl}/read.py | 0 chython/files/{_mdl => mdl}/rxn.py | 0 chython/files/{_mdl => mdl}/stereo.py | 0 chython/files/{_mdl => mdl}/write.py | 0 11 files changed, 4 insertions(+), 4 deletions(-) rename chython/files/{_mdl => mdl}/__init__.py (100%) rename chython/files/{_mdl => mdl}/emol.py (100%) rename chython/files/{_mdl => mdl}/erxn.py (100%) rename chython/files/{_mdl => mdl}/mol.py (100%) rename chython/files/{_mdl => mdl}/read.py (100%) rename chython/files/{_mdl => mdl}/rxn.py (100%) rename chython/files/{_mdl => mdl}/stereo.py (100%) rename chython/files/{_mdl => mdl}/write.py (100%) diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index ab969b21..543f33dd 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -24,7 +24,7 @@ from typing import Union, List, Iterator, Dict, Optional from ._convert import create_molecule, create_reaction from ._mapping import postprocess_parsed_molecule, postprocess_parsed_reaction -from ._mdl import postprocess_molecule +from .mdl import postprocess_molecule from ..containers import MoleculeContainer, ReactionContainer from ..exceptions import EmptyMolecule, EmptyReaction diff --git a/chython/files/RDFrw.py b/chython/files/RDFrw.py index 62bebbae..9e8a20f2 100644 --- a/chython/files/RDFrw.py +++ b/chython/files/RDFrw.py @@ -25,8 +25,8 @@ from sys import platform from time import strftime from typing import Union, Dict, List -from ._mdl import (MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, parse_rxn_v2000, parse_rxn_v3000, - postprocess_molecule) +from .mdl import (MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, parse_rxn_v2000, parse_rxn_v3000, + postprocess_molecule) from ._convert import create_molecule, create_reaction from ._mapping import postprocess_parsed_molecule, postprocess_parsed_reaction from ..containers import ReactionContainer, MoleculeContainer diff --git a/chython/files/SDFrw.py b/chython/files/SDFrw.py index 04edb0ad..232f3fe6 100644 --- a/chython/files/SDFrw.py +++ b/chython/files/SDFrw.py @@ -23,7 +23,7 @@ from subprocess import check_output from sys import platform from typing import Optional, List -from ._mdl import MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, postprocess_molecule +from .mdl import MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, postprocess_molecule from ._convert import create_molecule from ._mapping import postprocess_parsed_molecule from ..containers import MoleculeContainer diff --git a/chython/files/_mdl/__init__.py b/chython/files/mdl/__init__.py similarity index 100% rename from chython/files/_mdl/__init__.py rename to chython/files/mdl/__init__.py diff --git a/chython/files/_mdl/emol.py b/chython/files/mdl/emol.py similarity index 100% rename from chython/files/_mdl/emol.py rename to chython/files/mdl/emol.py diff --git a/chython/files/_mdl/erxn.py b/chython/files/mdl/erxn.py similarity index 100% rename from chython/files/_mdl/erxn.py rename to chython/files/mdl/erxn.py diff --git a/chython/files/_mdl/mol.py b/chython/files/mdl/mol.py similarity index 100% rename from chython/files/_mdl/mol.py rename to chython/files/mdl/mol.py diff --git a/chython/files/_mdl/read.py b/chython/files/mdl/read.py similarity index 100% rename from chython/files/_mdl/read.py rename to chython/files/mdl/read.py diff --git a/chython/files/_mdl/rxn.py b/chython/files/mdl/rxn.py similarity index 100% rename from chython/files/_mdl/rxn.py rename to chython/files/mdl/rxn.py diff --git a/chython/files/_mdl/stereo.py b/chython/files/mdl/stereo.py similarity index 100% rename from chython/files/_mdl/stereo.py rename to chython/files/mdl/stereo.py diff --git a/chython/files/_mdl/write.py b/chython/files/mdl/write.py similarity index 100% rename from chython/files/_mdl/write.py rename to chython/files/mdl/write.py From df0b08c3baaebe2399d4a2635eded5c9813d38d2 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Nov 2024 17:55:22 +0100 Subject: [PATCH 44/68] Fixed stereo parsing bug --- chython/files/daylight/parser.py | 6 +++++- chython/files/daylight/smiles.py | 6 ++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/chython/files/daylight/parser.py b/chython/files/daylight/parser.py index f685a359..42d1583b 100644 --- a/chython/files/daylight/parser.py +++ b/chython/files/daylight/parser.py @@ -37,6 +37,7 @@ def parser(tokens, strong_cycle): last_num = 0 stack = [] cycles = {} + stereo_atoms = {} stereo_bonds = defaultdict(dict) previous = None @@ -135,6 +136,8 @@ def parser(tokens, strong_cycle): # else bt == 4 - skip dot previous = None + if 'stereo' in token: + stereo_atoms[atom_num] = token.pop('stereo') atoms.append(token) atoms_types.append(token_type) last_num = atom_num @@ -147,7 +150,8 @@ def parser(tokens, strong_cycle): elif previous: raise IncorrectSmiles('bond on the end') - return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_bonds': stereo_bonds, 'log': log} + return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_atoms': stereo_atoms, + 'stereo_bonds': stereo_bonds, 'log': log} __all__ = ['parser'] diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 442195f8..60630ba0 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -170,9 +170,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if ignore_stereo: return - - stereo_atoms = [(n, s) for n, a in enumerate(data['atoms']) if (s := a.get('stereo')) is not None] - if not stereo_atoms and not data['stereo_bonds']: + elif not data['stereo_atoms'] or not data['stereo_bonds']: return atoms = molecule._atoms @@ -185,7 +183,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): log = [] stereo = [] - for i, s in stereo_atoms: + for i, s in data['stereo_atoms'].items(): n = mapping[i] if not i and atoms[n].implicit_hydrogens: # first atom in smiles has reversed chiral mark s = not s From 3e1799e28ecace6e12f9771e96cad5a580f362e1 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Nov 2024 17:59:35 +0100 Subject: [PATCH 45/68] Reactors refactoring started --- chython/reactor/base.py | 204 +++++++++++++-------------------- chython/reactor/transformer.py | 17 ++- 2 files changed, 86 insertions(+), 135 deletions(-) diff --git a/chython/reactor/base.py b/chython/reactor/base.py index ca128cbf..acfe4cc0 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -17,12 +17,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from collections import defaultdict -from itertools import product from typing import Union from ..containers import MoleculeContainer, QueryContainer from ..containers.bonds import Bond -from ..periodictable import Element, ListElement, AnyElement, QueryElement, AnyMetal +from ..periodictable import Element, AnyElement, QueryElement class BaseReactor: @@ -40,18 +38,6 @@ def __init__(self, pattern, replacement, delete_atoms, fix_rings, fix_tautomers) self._fix_rings = fix_rings self._fix_tautomers = fix_tautomers - def _patcher(self, structure: MoleculeContainer, mapping): - new = self._prepare_skeleton(structure, mapping) - self._fix_stereo(new, structure, mapping) - - if self._fix_rings: - new.kekule() # keeps stereo as is - if not new.thiele(fix_tautomers=self._fix_tautomers): # fixes stereo if any ring aromatized - new.fix_stereo() - else: - new.fix_stereo() - yield new - def _get_deleted(self, structure, mapping): if not self._to_delete: return set() @@ -83,7 +69,7 @@ def _get_deleted(self, structure, mapping): to_delete.update(delete) return to_delete - def _prepare_skeleton(self, structure, mapping): + def _patcher(self, structure: MoleculeContainer, mapping): atoms = structure._atoms bonds = structure._bonds @@ -95,156 +81,122 @@ def _prepare_skeleton(self, structure, mapping): stereo_atoms = [] stereo_bonds = [] - for n, a in self._replacement.atoms(): - if isinstance(a, AnyElement): - if n := mapping.get(n): + for n, ra in self._replacement.atoms(): + if isinstance(ra, AnyElement): + if m := mapping.get(n): # keep matched atom type and isotope - e = atoms[n].copy(stereo=True) - e.charge = a.charge - e.is_radical = a.is_radical - if a.stereo is not None: # override stereo - e._stereo = a.stereo - elif e.stereo is not None: # keep original stereo - stereo_atoms.append(n) # mark for stereo fix - natoms[n] = e - nbonds[n] = {} + sa = atoms[m] + a = sa.copy() + a.charge = ra.charge + a.is_radical = ra.is_radical + if ra.stereo is not None: # override stereo + a._stereo = ra.stereo + elif sa.stereo is not None: # keep original stereo + stereo_atoms.append(m) # mark for stereo fix else: raise ValueError("AnyElement doesn't match to pattern") else: # QueryElement or Element - a: Union[QueryElement, Element] # typehint - e = Element.from_atomic_number(a.atomic_number) - e = e(a.isotope, charge=a.charge, is_radical=a.is_radical, stereo=a.stereo) + ra: Union[QueryElement, Element] # typehint + e = Element.from_atomic_number(ra.atomic_number) + a = e(ra.isotope, charge=ra.charge, is_radical=ra.is_radical) if not (m := mapping.get(n)): # new atom m = max_atom + 1 max_atom += 1 mapping[n] = m - if isinstance(a, Element): - e._implicit_hydrogens = a.implicit_hydrogens # keep H count from patch - e.x = a.x # keep coordinates from patch - e.y = a.y - elif len(a.implicit_hydrogens) == 1: - e._implicit_hydrogens = a.implicit_hydrogens[0] - elif a.implicit_hydrogens: + a._stereo = ra.stereo # keep stereo from patch for new atoms + if isinstance(ra, Element): + a._implicit_hydrogens = ra.implicit_hydrogens # keep H count from patch + a.x = ra.x # keep coordinates from patch + a.y = ra.y + elif len(ra.implicit_hydrogens) == 1: # keep H count from patch + a._implicit_hydrogens = ra.implicit_hydrogens[0] + elif ra.implicit_hydrogens: raise ValueError('Query element in patch has more than one implicit hydrogen') else: # existing atoms - b = atoms[m] - e.x = b.x # preserve existing coordinates - e.y = b.y - if a.stereo is None and b.stereo is not None: # keep original stereo - e._stereo = b.stereo + sa = atoms[m] + a.x = sa.x # preserve existing coordinates + a.y = sa.y + if ra.stereo is not None: + a._stereo = ra.stereo + elif sa.stereo is not None: # keep original stereo stereo_atoms.append(m) - natoms[m] = e - nbonds[m] = {} + natoms[m] = a + nbonds[m] = {} # preserve connectivity order for n, bs in self._replacement._bonds.items(): n = mapping[n] - for m, b in bs.items(): + for m, rb in bs.items(): m = mapping[m] - if n in nbonds[m]: + if n in nbonds[m]: # back-link nbonds[n][m] = nbonds[m][n] else: - nbonds[n][m] = b = Bond(int(b), stereo=b.stereo) - if b.stereo is None: - if not (nb := bonds.get(n)): - continue - if not (mb := nb.get(m)): - continue - if mb.stereo is None: - continue - # original structure has stereo bond - b._stereo = mb.stereo + nbonds[n][m] = b = Bond(int(rb)) + if rb.stereo is not None: # override stereo + b._stereo = rb.stereo + elif (sbn := bonds.get(n)) is None or (sb := sbn.get(m)) is None or sb.stereo is None: + continue + else: # original structure has stereo bond stereo_bonds.append((n, m)) - patch_atoms = set(new) # don't move! + patched_atoms = set(new) for n, a in atoms.items(): # add unmatched or masked atoms - if n not in patch_atoms and n not in to_delete: + if n not in patched_atoms and n not in to_delete: natoms[n] = a.copy(hydrogens=True, stereo=True) nbonds[n] = {} - for n, bs in bonds.items(): + for n, bs in bonds.items(): # preserve connectivity order for keeping stereo labels as is if n in to_delete: # atoms for removing continue for m, b in bs.items(): # ignore deleted atoms and patch atoms - if m in to_delete or n in patch_atoms and m in patch_atoms: + if m in to_delete or n in patched_atoms and m in patched_atoms: continue - elif n in nbonds[m]: + elif n in nbonds[m]: # back-link nbonds[n][m] = nbonds[m][n] + elif b.stereo is not None and (n in patched_atoms or m in patched_atoms): + # unmatched/masked atoms to patched atoms linker bonds + # stereo label should be recalculated + nbonds[n][m] = b.copy() + stereo_bonds.append((n, m)) else: nbonds[n][m] = b.copy(stereo=True) - if b.stereo is not None and (n in patch_atoms or m in patch_atoms): - stereo_bonds.append((n, m)) for n, a in new.atoms(): if a.implicit_hydrogens is None: new.calc_implicit(n) new.calc_labels() - return new - def _fix_stereo(self, new, structure, mapping): - products = self.__products - stereo_override = set() - r_mapping = {m: n for n, m in mapping.items()} - - # set patch atoms stereo - for n, s in products._atoms_stereo.items(): - m = mapping[n] - new._atoms_stereo[m] = products._translate_tetrahedron_sign(n, [r_mapping[x] for x in - new.stereogenic_tetrahedrons[m]], s) - stereo_override.add(m) - - for n, s in products._allenes_stereo.items(): - m = mapping[n] - t1, t2, *_ = new.stereogenic_allenes[m] - new._allenes_stereo[m] = products._translate_allene_sign(n, r_mapping[t1], r_mapping[t2], s) - stereo_override.add(m) - - for (n, m), s in products._cis_trans_stereo.items(): - nm = (mapping[n], mapping[m]) - try: - t1, t2, *_ = new.stereogenic_cis_trans[nm] - except KeyError: - nm = nm[::-1] - t2, t1, *_ = new.stereogenic_cis_trans[nm] - new._cis_trans_stereo[nm] = products._translate_cis_trans_sign(n, m, r_mapping[t1], r_mapping[t2], s) - stereo_override.update(nm) - - # set unmatched part stereo and not overridden by patch. - for n, s in structure._atoms_stereo.items(): - if n in stereo_override or n not in new.stereogenic_tetrahedrons or \ - new._bonds[n].keys() != structure._bonds[n].keys(): - # skip atoms with changed neighbors - continue - new._atoms_stereo[n] = structure._translate_tetrahedron_sign(n, new.stereogenic_tetrahedrons[n], s) - - for n, s in structure._allenes_stereo.items(): - if n in stereo_override or n not in new.stereogenic_allenes or \ - set(new.stereogenic_allenes[n]) != set(structure.stereogenic_allenes[n]): - # skip changed allenes - continue - t1, t2, *_ = new.stereogenic_allenes[n] - new._allenes_stereo[n] = structure._translate_allene_sign(n, t1, t2, s) + # translate stereo sign from old order to new order + for n in stereo_atoms: + if n in new.stereogenic_tetrahedrons: + if bonds[n].keys() != nbonds[n].keys(): + # flush stereo from reaction center. should be explicitly set in replacement. + continue + s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], atoms[n].stereo) + natoms[n]._stereo = s + elif n in new.stereogenic_allenes: + if set(new.stereogenic_allenes[n]) != set(structure.stereogenic_allenes[n]): + # flush stereo for changed allene substituents + continue + s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], atoms[n].stereo) + natoms[n]._stereo = s + # else: ignore label - for nm, s in structure._cis_trans_stereo.items(): - n, m = nm - if n in stereo_override or m in stereo_override: - continue - env = structure.stereogenic_cis_trans[nm] - try: - new_env = new.stereogenic_cis_trans[nm] - except KeyError: - nm = nm[::-1] - try: - new_env = new.stereogenic_cis_trans[nm] - except KeyError: + for n, m in stereo_bonds: + if (t12 := new._stereo_cis_trans_terminals.get(n, True)) == new._stereo_cis_trans_terminals.get(m, False): + if set(new.stereogenic_cis_trans[t12]) != set(structure.stereogenic_cis_trans[t12]): continue - t2, t1, *_ = new_env - else: - t1, t2, *_ = new_env - if set(env) != set(new_env): - continue - new._cis_trans_stereo[nm] = structure._translate_cis_trans_sign(n, m, t1, t2, s) + new._translate_cis_trans_sign(*t12, *structure.stereogenic_cis_trans[t12][:2], bonds[n][m].stereo) + # else: ignore label + + if self._fix_rings: + new.kekule() # keeps stereo as is + if not new.thiele(fix_tautomers=self._fix_tautomers): # fixes stereo if any ring aromatized + new.fix_stereo() + else: + new.fix_stereo() + return new __all__ = ['BaseReactor'] diff --git a/chython/reactor/transformer.py b/chython/reactor/transformer.py index 5852cc47..1ca11099 100644 --- a/chython/reactor/transformer.py +++ b/chython/reactor/transformer.py @@ -43,21 +43,20 @@ def __init__(self, pattern: QueryContainer, replacement: Union[MoleculeContainer if not isinstance(pattern, QueryContainer) or not isinstance(replacement, (MoleculeContainer, QueryContainer)): raise TypeError('invalid params') - self.pattern = pattern - self.replacement = replacement - self.__automorphism_filter = automorphism_filter - self.__copy_metadata = copy_metadata + self._pattern = pattern + self._automorphism_filter = automorphism_filter + self._copy_metadata = copy_metadata super().__init__(pattern, replacement, delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, structure: MoleculeContainer): if not isinstance(structure, MoleculeContainer): raise TypeError('only Molecules possible') - for mapping in self.pattern.get_mapping(structure, automorphism_filter=self.__automorphism_filter): - for transformed in self._patcher(structure, mapping): - if self.__copy_metadata: - transformed.meta.update(structure.meta) - yield transformed + for mapping in self._pattern.get_mapping(structure, automorphism_filter=self._automorphism_filter): + transformed = self._patcher(structure, mapping) + if self._copy_metadata: + transformed.meta.update(structure.meta) + yield transformed __all__ = ['Transformer'] From 907ed2cac0dacb79f8c155b405b121e031358f8e Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Nov 2024 18:47:50 +0100 Subject: [PATCH 46/68] fixes --- chython/files/daylight/parser.py | 2 +- chython/files/daylight/smiles.py | 2 +- chython/reactor/base.py | 44 ++++++++++++++++++-------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/chython/files/daylight/parser.py b/chython/files/daylight/parser.py index 42d1583b..f45d020c 100644 --- a/chython/files/daylight/parser.py +++ b/chython/files/daylight/parser.py @@ -136,7 +136,7 @@ def parser(tokens, strong_cycle): # else bt == 4 - skip dot previous = None - if 'stereo' in token: + if token.get('stereo') is not None: stereo_atoms[atom_num] = token.pop('stereo') atoms.append(token) atoms_types.append(token_type) diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 60630ba0..293597ac 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -170,7 +170,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if ignore_stereo: return - elif not data['stereo_atoms'] or not data['stereo_bonds']: + elif not data['stereo_atoms'] and not data['stereo_bonds']: return atoms = molecule._atoms diff --git a/chython/reactor/base.py b/chython/reactor/base.py index acfe4cc0..ca39685a 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -81,6 +81,8 @@ def _patcher(self, structure: MoleculeContainer, mapping): stereo_atoms = [] stereo_bonds = [] + # let's preserve connectivity order from replacement to keep stereo signs as is. + # stereo labels from original structure will be recalculated after full molecule construction. for n, ra in self._replacement.atoms(): if isinstance(ra, AnyElement): if m := mapping.get(n): @@ -140,10 +142,17 @@ def _patcher(self, structure: MoleculeContainer, mapping): stereo_bonds.append((n, m)) patched_atoms = set(new) - for n, a in atoms.items(): # add unmatched or masked atoms + for n, sa in atoms.items(): # add unmatched or masked atoms if n not in patched_atoms and n not in to_delete: - natoms[n] = a.copy(hydrogens=True, stereo=True) + natoms[n] = a = sa.copy(hydrogens=True) nbonds[n] = {} + if sa.stereo is not None: + # in case of allenes label can disappear/change, thus, requires recalculation + # for tetrahedrons label can be stored as is + if len(bonds[n]) >= 3: + a._stereo = sa.stereo + else: + stereo_atoms.append(n) for n, bs in bonds.items(): # preserve connectivity order for keeping stereo labels as is if n in to_delete: # atoms for removing @@ -154,13 +163,11 @@ def _patcher(self, structure: MoleculeContainer, mapping): continue elif n in nbonds[m]: # back-link nbonds[n][m] = nbonds[m][n] - elif b.stereo is not None and (n in patched_atoms or m in patched_atoms): - # unmatched/masked atoms to patched atoms linker bonds - # stereo label should be recalculated - nbonds[n][m] = b.copy() - stereo_bonds.append((n, m)) else: - nbonds[n][m] = b.copy(stereo=True) + nbonds[n][m] = b.copy() + if b.stereo is not None: + # stereo label should be recalculated + stereo_bonds.append((n, m)) for n, a in new.atoms(): if a.implicit_hydrogens is None: @@ -170,24 +177,23 @@ def _patcher(self, structure: MoleculeContainer, mapping): # translate stereo sign from old order to new order for n in stereo_atoms: if n in new.stereogenic_tetrahedrons: - if bonds[n].keys() != nbonds[n].keys(): + if bonds[n].keys() == nbonds[n].keys(): # flush stereo from reaction center. should be explicitly set in replacement. - continue - s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], atoms[n].stereo) - natoms[n]._stereo = s + s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], atoms[n].stereo) + natoms[n]._stereo = s elif n in new.stereogenic_allenes: - if set(new.stereogenic_allenes[n]) != set(structure.stereogenic_allenes[n]): + if set(new.stereogenic_allenes[n]) == set(structure.stereogenic_allenes[n]): # flush stereo for changed allene substituents - continue - s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], atoms[n].stereo) - natoms[n]._stereo = s + s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], atoms[n].stereo) + natoms[n]._stereo = s # else: ignore label for n, m in stereo_bonds: if (t12 := new._stereo_cis_trans_terminals.get(n, True)) == new._stereo_cis_trans_terminals.get(m, False): - if set(new.stereogenic_cis_trans[t12]) != set(structure.stereogenic_cis_trans[t12]): - continue - new._translate_cis_trans_sign(*t12, *structure.stereogenic_cis_trans[t12][:2], bonds[n][m].stereo) + if set(new.stereogenic_cis_trans[t12]) == set(env := structure.stereogenic_cis_trans[t12]): + # connected to cumulenes atoms should be the same + s = new._translate_cis_trans_sign(*t12, *env[:2], bonds[n][m].stereo) + nbonds[n][m]._stereo = s # else: ignore label if self._fix_rings: From 8d3994eed0e186e88dfe4866a55cb2f7752843e4 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 11 Dec 2024 09:22:44 +0100 Subject: [PATCH 47/68] WIP: pach support fixes --- chython/__init__.py | 4 +- chython/algorithms/stereo.py | 4 ++ chython/containers/__init__.py | 7 ++- chython/containers/_pack.pyx | 107 +++++++++++++++++---------------- 4 files changed, 65 insertions(+), 57 deletions(-) diff --git a/chython/__init__.py b/chython/__init__.py index 0c860191..b695b7b2 100644 --- a/chython/__init__.py +++ b/chython/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # Copyright 2014-2019 Timur Madzhidov tmadzhidov@gmail.com features and API discussion # Copyright 2014-2019 Alexandre Varnek base idea of CGR approach # This file is part of chython. @@ -25,7 +25,7 @@ from .utils import * -pickle_cache = False # store cached attributes in pickle torch_device = 'cpu' # AAM model device. Change before first `reset_mapping` call! + __all__ = [] diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index fd19fa75..80f87049 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -630,6 +630,10 @@ def fix_stereo(self: 'MoleculeContainer'): old_stereo = fail_stereo self.flush_stereo_cache() + @cached_property + def _cis_trans_count(self) -> int: + return sum(b.stereo is not None for *_, b in self.bonds()) + @cached_property def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: """ diff --git a/chython/containers/__init__.py b/chython/containers/__init__.py index 6658eeaa..0f2f3dbb 100644 --- a/chython/containers/__init__.py +++ b/chython/containers/__init__.py @@ -36,7 +36,8 @@ def unpach(data: bytes, /, *, compressed=True) -> Union[MoleculeContainer, React return ReactionContainer.unpack(data, compressed=False) +unpack = unpach + + __all__ = [x for x in locals() if x.endswith('Container')] -__all__.append('Bond') -__all__.append('QueryBond') -__all__.append('unpach') +__all__.extend(['Bond', 'QueryBond', 'unpack', 'unpach']) diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack.pyx index fa61afc0..fe024654 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -53,35 +53,26 @@ from libc.math cimport ldexp, frexp @cython.cdivision(True) @cython.wraparound(False) def pack(object molecule): - cdef bint b # binary flag + cdef bint b = True # binary flag cdef char charge - cdef unsigned char atomic_number, isotope, bond, s = 0, buffer_b, buffer_o - cdef unsigned char *p, *data + cdef unsigned char atomic_number, ngb_count, isotope, bond, s = 0, buffer_b, buffer_o, stereo, hcr + cdef unsigned char *data cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift # can be > 2^16 - cdef unsigned char[4096] stereo, hcr, seen - cdef unsigned int[4096] xy # 2 * 16bit + cdef unsigned char[4096] seen cdef bytes py_pack - cdef dict py_ngb, py_atoms, py_bonds, py_charges, py_radicals, py_hydrogens, py_plane - cdef dict py_cis_trans_stereo, py_atoms_stereo, py_allenes_stereo + cdef dict py_ngb, py_atoms, py_bonds cdef tuple py_tuple cdef object py_atom, py_bond, py_nan_int, py_obj # map molecule to vars py_atoms = molecule._atoms py_bonds = molecule._bonds - py_charges = molecule._charges - py_radicals = molecule._radicals - py_hydrogens = molecule._hydrogens - py_cis_trans_stereo = molecule._cis_trans_stereo - py_atoms_stereo = molecule._atoms_stereo - py_allenes_stereo = molecule._allenes_stereo - py_plane = molecule._plane # calculate elements count atoms_count = len(py_atoms) - cis_trans_count = len(py_cis_trans_stereo) + cis_trans_count = molecule._cis_trans_count for py_ngb in py_bonds.values(): bonds_count += len(py_ngb) @@ -103,64 +94,76 @@ def pack(object molecule): if not data: raise MemoryError() - # precalculate atom attrs - # should be done independently, due to possible randomness in dicts order. - # 3 bit - hydrogens (0-7) | 4 bit - charge | 1 bit - radical - for n, py_nan_int in py_hydrogens.items(): - if py_nan_int is None: - hcr[n] = 0xe0 # 0b11100000 - else: - hcr[n] = py_nan_int << 5 - for n, charge in py_charges.items(): - hcr[n] |= (charge + 4) << 1 - for n, b in py_radicals.items(): - if b: # lazy memory access - hcr[n] |= 1 - - # 2 float16 big endian - for n, py_tuple in py_plane.items(): - p = &xy[n] - double_to_float16(py_tuple[0], &p[0]) - double_to_float16(py_tuple[1], &p[2]) - - # erase random data - seen[n] = 0 - stereo[n] = 0 - - # 2 bit tetrahedron | 2 bit allene | 0000 - for n, b in py_atoms_stereo.items(): - stereo[n] = 0xc0 if b else 0x80 - for n, b in py_allenes_stereo.items(): - stereo[n] = 0x30 if b else 0x20 - # start pack collection data[0] = 2 # header. specification version 2 data[1] = atoms_count >> 4 # 5-12b of atom count value data[2] = atoms_count << 4 | cis_trans_count >> 8 # 1-4b of atom count value, 9-12b of cis-trans count value data[3] = cis_trans_count # 1-8b of cis-trans count value - b = True # init connection table flag for py_obj, py_atom in py_atoms.items(): py_ngb = py_bonds[py_obj] + ngb_count = len(py_ngb) n = py_obj # cast to C seen[n] = 1 - p = &xy[n] # XY atomic_number = py_atom.atomic_number - py_nan_int = py_atom._Core__isotope # direct access + + py_nan_int = py_atom._isotope # direct access if py_nan_int is None: isotope = 0 else: isotope = py_nan_int - common_isotopes[atomic_number] + py_nan_int = py_atom._stereo + if py_nan_int is None: + stereo = 0 + # V2 specification + # 2 bit tetrahedron | 2 bit allene | 0000 + elif py_nan_int: + if ngb_count == 2: + stereo = 0x30 + else: + stereo = 0xc0 + else: + if ngb_count == 2: + stereo = 0x20 + else: + stereo = 0x80 + + # precalculate atom attrs + # should be done independently, due to possible randomness in dicts order. + # 3 bit - hydrogens (0-7) | 4 bit - charge | 1 bit - radical + py_nan_int = py_atom._implicit_hydrogens + if py_nan_int is None: + hcr = 0xe0 # 0b11100000 + else: + hcr = py_nan_int << 5 + + charge = py_atom._charge + hcr |= (charge + 4) << 1 + if py_atom._is_radical: + hcr |= 1 + data[atoms_shift] = n >> 4 # 5-12b AN - data[atoms_shift + 1] = n << 4 | len(py_ngb) # 1-4b AN, 4b NC - data[atoms_shift + 2] = stereo[n] | isotope >> 1 # TS , AS , 4b I + data[atoms_shift + 1] = n << 4 | ngb_count # 1-4b AN, 4b NC + data[atoms_shift + 2] = stereo | isotope >> 1 # TS , AS , 4b I data[atoms_shift + 3] = isotope << 7 | atomic_number # 1bI , A + + # 2 float16 big endian + for n, py_tuple in py_plane.items(): + p = &xy[n] + double_to_float16(py_tuple[0], &p[0]) + double_to_float16(py_tuple[1], &p[2]) + + # erase random data + seen[n] = 0 + stereo[n] = 0 + data[atoms_shift + 4] = p[0] data[atoms_shift + 5] = p[1] data[atoms_shift + 6] = p[2] data[atoms_shift + 7] = p[3] - data[atoms_shift + 8] = hcr[n] + + data[atoms_shift + 8] = hcr atoms_shift += 9 # collect connection table From 8e6b9a78fd0d38f0d3e93a5d5bee55a7e2cd3e2d Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 23 Dec 2024 14:53:58 +0100 Subject: [PATCH 48/68] packing reimplemented unpacking WIP --- chython/algorithms/stereo.py | 2 +- chython/containers/_pack.pyx | 69 +++++++----- chython/containers/_unpack.pyx | 194 ++++++++++++++------------------- chython/containers/molecule.py | 33 +----- 4 files changed, 126 insertions(+), 172 deletions(-) diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index 80f87049..6cd814a2 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -197,7 +197,7 @@ def cumulenes(self: 'MoleculeContainer') -> List[Tuple[int, ...]]: terminals = [x for x, y in adj.items() if len(y) == 1] # list to keep atoms order! cumulenes = [] while terminals: - n = terminals.pop() + n = terminals.pop(0) m = adj[n].pop() path = [n, m] while m not in terminals: diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack.pyx index fe024654..37b42b34 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack.pyx @@ -20,7 +20,7 @@ cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.math cimport ldexp, frexp -# Format specification:: +# Format V2 specification:: # # Big endian bytes order # 8 bit - 0x02 (current format specification) @@ -48,6 +48,25 @@ from libc.math cimport ldexp, frexp # 7 bit - zero padding. in future can be used for extra bond-level stereo, like atropoisomers. # 1 bit - sign +# stereo block: +# 0000 - no stereo +# 0001 - not in use +# 0010 - allene +# 0011 - allene +# 0100 - not in use +# 0101 - not in use +# 0110 - not in use +# 0111 - not in use +# 1000 - tetrahedron +# 1001 - not in use +# 1010 - not in use +# 1011 - not in use +# 1100 - tetrahedron +# 1101 - not in use +# 1110 - not in use +# 1111 - not in use + + @cython.nonecheck(False) @cython.boundscheck(False) @cython.cdivision(True) @@ -57,18 +76,19 @@ def pack(object molecule): cdef char charge cdef unsigned char atomic_number, ngb_count, isotope, bond, s = 0, buffer_b, buffer_o, stereo, hcr cdef unsigned char *data - cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m + cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m, tn, tm cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift # can be > 2^16 - cdef unsigned char[4096] seen + cdef unsigned char[4096] seen # atom number is 12 bit, thus, can be any value up to 4095. numbers are not continuous cdef bytes py_pack - cdef dict py_ngb, py_atoms, py_bonds + cdef dict py_ngb, py_atoms, py_bonds, py_stereo cdef tuple py_tuple cdef object py_atom, py_bond, py_nan_int, py_obj # map molecule to vars py_atoms = molecule._atoms py_bonds = molecule._bonds + py_stereo = molecule._stereo_cis_trans_terminals # calculate elements count atoms_count = len(py_atoms) @@ -94,6 +114,8 @@ def pack(object molecule): if not data: raise MemoryError() + seen[:] = 0 # erase random data + # start pack collection data[0] = 2 # header. specification version 2 data[1] = atoms_count >> 4 # 5-12b of atom count value @@ -119,12 +141,12 @@ def pack(object molecule): # V2 specification # 2 bit tetrahedron | 2 bit allene | 0000 elif py_nan_int: - if ngb_count == 2: + if ngb_count == 2: # allene stereo = 0x30 else: stereo = 0xc0 else: - if ngb_count == 2: + if ngb_count == 2: # allene stereo = 0x20 else: stereo = 0x80 @@ -149,19 +171,8 @@ def pack(object molecule): data[atoms_shift + 3] = isotope << 7 | atomic_number # 1bI , A # 2 float16 big endian - for n, py_tuple in py_plane.items(): - p = &xy[n] - double_to_float16(py_tuple[0], &p[0]) - double_to_float16(py_tuple[1], &p[2]) - - # erase random data - seen[n] = 0 - stereo[n] = 0 - - data[atoms_shift + 4] = p[0] - data[atoms_shift + 5] = p[1] - data[atoms_shift + 6] = p[2] - data[atoms_shift + 7] = p[3] + double_to_float16(py_atom._x, &data[atoms_shift + 4]) + double_to_float16(py_atom._y, &data[atoms_shift + 6]) data[atoms_shift + 8] = hcr atoms_shift += 9 @@ -181,7 +192,7 @@ def pack(object molecule): b = True if not seen[m]: - bond = py_bond._Bond__order - 1 + bond = py_bond._order - 1 # 3 3 2 | 1 3 3 1 | 2 3 3 if s == 0: buffer_o = bond << 5 @@ -213,17 +224,19 @@ def pack(object molecule): order_shift += 1 s = 0 + py_nan_int = py_bond._stereo + if py_nan_int is not None: + py_tuple = py_stereo[py_obj] + tn, tm = py_tuple + data[cis_trans_shift] = tn >> 4 + data[cis_trans_shift + 1] = tn << 4 | tm >> 8 + data[cis_trans_shift + 2] = tm + data[cis_trans_shift + 3] = py_nan_int + cis_trans_shift += 4 + if s: # flush buffer data[order_shift] = buffer_o - for py_tuple, b in py_cis_trans_stereo.items(): - n, m = py_tuple - data[cis_trans_shift] = n >> 4 - data[cis_trans_shift + 1] = n << 4 | m >> 8 - data[cis_trans_shift + 2] = m - data[cis_trans_shift + 3] = b - cis_trans_shift += 4 - try: py_pack = data[:size] finally: diff --git a/chython/containers/_unpack.pyx b/chython/containers/_unpack.pyx index 670f1f7b..aba7ca34 100644 --- a/chython/containers/_unpack.pyx +++ b/chython/containers/_unpack.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # cython: language_level=3 # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -21,7 +21,15 @@ cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.math cimport ldexp +from chython.containers import MoleculeContainer from chython.containers.bonds import Bond +from chython.periodictable import (H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, + Mn, Fe, Co, Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, + Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, + Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, Tl, Pb, Bi, Po, At, Rn, Fr, + Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, Bh, Hs, + Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og) + # Format specification:: # @@ -57,20 +65,17 @@ from chython.containers.bonds import Bond @cython.wraparound(False) def unpack(const unsigned char[::1] data not None): cdef char *charges - cdef unsigned char a, b, c, d, isotope, atomic_number, neighbors_count, s = 0, nc, version - cdef unsigned char *atoms, *hydrogens, *neighbors, *orders, *is_tet, *is_all - cdef bint *stereo_sign, *ct_sign, *radicals + cdef unsigned char a, b, c, d, isotope, atomic_number, neighbors_count, s = 0, version, stereo, hydrogens + cdef unsigned char *neighbors, *orders + cdef bint *ct_sign cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, order_count cdef unsigned short i, j, k = 0, n, m, buffer_b, shift = 0 - cdef unsigned short *mapping, *isotopes, *cis_trans_1, *cis_trans_2, *connections + cdef unsigned short *mapping, *cis_trans_1, *cis_trans_2, *connections cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift - cdef double *x_coord, *y_coord cdef unsigned char[4096] seen - cdef object bond, py_n, py_m - cdef dict py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, py_ngb - cdef dict py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo - cdef list py_mapping, py_atoms, py_isotopes, py_bonds_flat + cdef object py_mol, py_bond, py_n, py_m, py_atom, py_nan_bool + cdef dict py_atoms, py_bonds, py_ngb # read header version = data[0] @@ -79,24 +84,16 @@ def unpack(const unsigned char[::1] data not None): cis_trans_count = (b & 0x0f) << 8 | c # allocate memory - charges = PyMem_Malloc(atoms_count * sizeof(char)) - radicals = PyMem_Malloc(atoms_count * sizeof(bint)) - atoms = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - hydrogens = PyMem_Malloc(atoms_count * sizeof(unsigned char)) neighbors = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - is_tet = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - is_all = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - stereo_sign = PyMem_Malloc(atoms_count * sizeof(bint)) mapping = PyMem_Malloc(atoms_count * sizeof(unsigned short)) - isotopes = PyMem_Malloc(atoms_count * sizeof(unsigned short)) - x_coord = PyMem_Malloc(atoms_count * sizeof(double)) - y_coord = PyMem_Malloc(atoms_count * sizeof(double)) - if not charges or not radicals or not atoms or not hydrogens or not neighbors or not is_tet or not is_all: - raise MemoryError() - if not stereo_sign or not mapping or not isotopes or not x_coord or not y_coord: + if not neighbors or not mapping: raise MemoryError() + py_mol = MoleculeContainer() + py_atoms = py_mol._atoms + py_bonds = py_mol._bonds + # unpack atom block to separate attributes arrays for i in range(atoms_count): a, b = data[atoms_shift], data[atoms_shift + 1] @@ -106,34 +103,47 @@ def unpack(const unsigned char[::1] data not None): bonds_count += neighbors_count a, b = data[atoms_shift + 2], data[atoms_shift + 3] - if a >> 7: # tetrahedron bit set - is_tet[i] = 1 - is_all[i] = 0 - stereo_sign[i] = a & 0x40 # mask th bit - else: - is_tet[i] = 0 - if a >> 5: # allene bit set - is_all[i] = 1 - stereo_sign[i] = a & 0x10 # mask al bit - else: - is_all[i] = 0 - - atoms[i] = atomic_number = b & 0x7f + stereo = a >> 4 + if stereo == 0: + py_nan_bool = None + elif stereo == 0b0010: + py_nan_bool = False + elif stereo == 0b0011: + py_nan_bool = True + elif stereo == 0b1000: + py_nan_bool = False + else: # if stereo == 0b1100: + py_nan_bool = True + + atomic_number = b & 0x7f + py_atom = object.__new__(elements[atomic_number]) + py_atoms[n] = py_atom + + py_atom._stereo = py_nan_bool + isotope = (a & 0x0f) << 1 | b >> 7 if isotope: - isotopes[i] = common_isotopes[atomic_number] + isotope + py_atom._isotope = common_isotopes[atomic_number] + isotope else: - isotopes[i] = 0 + py_atom._isotope = None a, b = data[atoms_shift + 4], data[atoms_shift + 5] - x_coord[i] = double_from_bytes(a, b) + py_atom._x = double_from_bytes(a, b) a, b = data[atoms_shift + 6], data[atoms_shift + 7] - y_coord[i] = double_from_bytes(a, b) + py_atom._y = double_from_bytes(a, b) a = data[atoms_shift + 8] - hydrogens[i] = a >> 5 - charges[i] = ((a >> 1) & 0x0f) - 4 - radicals[i] = a & 0x01 + hydrogens = a >> 5 + if hydrogens == 7: + py_atom._hydrogens = None + else: + py_atom._hydrogens = hydrogens + + py_atom._charge = ((a >> 1) & 0x0f) - 4 + if a & 0x01: + py_atom._is_radical = True + else: + py_atom._is_radical = False atoms_shift += 9 # calculate bonds count and pack sections @@ -145,7 +155,7 @@ def unpack(const unsigned char[::1] data not None): order_count = order_count / 8 + 1 else: order_count /= 8 - elif version == 0: + else: # if version == 0: order_count = bonds_count / 5 if bonds_count % 5: order_count += 1 @@ -193,7 +203,7 @@ def unpack(const unsigned char[::1] data not None): buffer_b = (a & 0x3) << 1 s = 1 i += 2 - elif version == 0: + else: # if version == 0: for j in range(order_shift, cis_trans_shift, 2): # 0 3 3 1 | 2 3 3 a, b = data[j], data[j + 1] @@ -219,77 +229,31 @@ def unpack(const unsigned char[::1] data not None): ct_sign[i] = d # d = 0x01 or 0x00 cis_trans_shift += 4 - # define returned data - py_mapping = [] - py_atoms = [] - py_isotopes = [] - py_charges = {} - py_radicals = {} - py_hydrogens = {} - py_plane = {} - py_atoms_stereo = {} - py_allenes_stereo = {} - py_cis_trans_stereo = {} - py_bonds = {} - py_bonds_flat = [] + for i in range(atoms_count): + n = mapping[i] + py_n = n # shared py int obj - for i in range(atoms_count): - n = mapping[i] - py_n = n # shared py int obj - - # fill intermediate data - py_mapping.append(py_n) - py_atoms.append(atoms[i]) - py_isotopes.append(isotopes[i] or None) - - py_charges[py_n] = charges[i] - py_radicals[py_n] = radicals[i] - if hydrogens[i] == 7: - py_hydrogens[py_n] = None - else: - py_hydrogens[py_n] = hydrogens[i] - - py_plane[py_n] = (x_coord[i], y_coord[i]) - - if is_tet[i]: - py_atoms_stereo[py_n] = stereo_sign[i] - elif is_all[i]: - py_allenes_stereo[py_n] = stereo_sign[i] - - py_bonds[py_n] = py_ngb = {} - seen[n] = 1 - - nc = neighbors[i] - for j in range(shift, shift + nc): - m = connections[j] - py_m = m - if seen[m]: # bond partially exists. need back-connection. - py_ngb[py_m] = py_bonds[py_m][py_n] - else: - bond = object.__new__(Bond) - bond._Bond__order = orders[k] + 1 - bond._Bond__n = py_n - bond._Bond__m = py_m - py_ngb[py_m] = bond - py_bonds_flat.append(bond) - k += 1 - shift += nc + py_bonds[py_n] = py_ngb = {} + seen[n] = 1 + + neighbors_count = neighbors[i] + for j in range(shift, shift + neighbors_count): + m = connections[j] + py_m = m + if seen[m]: # bond partially exists. need back-connection. + py_ngb[py_m] = py_bonds[py_m][py_n] + else: + bond = object.__new__(Bond) + bond._order = orders[k] + 1 + py_ngb[py_m] = bond + k += 1 + shift += neighbors_count for i in range(cis_trans_count): py_cis_trans_stereo[(cis_trans_1[i], cis_trans_2[i])] = ct_sign[i] - PyMem_Free(charges) - PyMem_Free(radicals) - PyMem_Free(atoms) - PyMem_Free(hydrogens) PyMem_Free(neighbors) - PyMem_Free(is_tet) - PyMem_Free(is_all) - PyMem_Free(stereo_sign) PyMem_Free(mapping) - PyMem_Free(isotopes) - PyMem_Free(x_coord) - PyMem_Free(y_coord) if bonds_count: PyMem_Free(connections) PyMem_Free(orders) @@ -297,9 +261,7 @@ def unpack(const unsigned char[::1] data not None): PyMem_Free(cis_trans_1) PyMem_Free(cis_trans_2) PyMem_Free(ct_sign) - return (py_mapping, py_atoms, py_isotopes, - py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, - py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo, size, py_bonds_flat) + return py_mol, size cdef short[119] common_isotopes @@ -312,6 +274,14 @@ common_isotopes[:] = [0, -15, -12, -9, -7, -5, -4, -2, 0, 3, 4, 7, 8, 11, 12, 15 254, 262, 265, 265, 269, 262, 273, 273, 277, 281, 278] +cdef object[119] elements +elements[:] = [None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, + Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, + Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, + Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, + Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og] + + cdef double double_from_bytes(unsigned char a, unsigned char b): cdef bint sign cdef int e diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 984f845c..16cabc46 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -555,36 +555,12 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, if compressed: data = decompress(data) if data[0] in (0, 2): - (mapping, atom_numbers, isotopes, charges, radicals, hydrogens, plane, bonds, - atoms_stereo, allenes_stereo, cis_trans_stereo, pack_length, bonds_flat) = unpack(data) + mol, pack_length = unpack(data) elif data[0] == 3: - (mapping, atom_numbers, isotopes, charges, radicals, hydrogens, plane, bonds, - atoms_stereo, allenes_stereo, cis_trans_stereo, pack_length, bonds_flat) = cpack(data) + mol, pack_length = cpack(data) else: raise ValueError('invalid pack header') - mol = object.__new__(cls) - mol._bonds = bonds - mol._plane = plane - mol._charges = charges - mol._radicals = radicals - mol._hydrogens = hydrogens - mol._atoms_stereo = atoms_stereo - mol._allenes_stereo = allenes_stereo - mol._cis_trans_stereo = cis_trans_stereo - - mol._MoleculeContainer__meta = None - mol._MoleculeContainer__name = None - mol._atoms = atoms = {} - - for n, a, i in zip(mapping, atom_numbers, isotopes): - atoms[n] = a = object.__new__(Element.from_atomic_number(a)) - a._Core__isotope = i - a._graph = ref(mol) - a._n = n - for b in bonds_flat: - b._Bond__graph = ref(mol) - if _return_pack_length: return mol, pack_length return mol @@ -610,11 +586,6 @@ def _cpack(self, order=None, check=True): atoms = self._atoms bonds = self._bonds - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - atoms_stereo = self._atoms_stereo - allenes_stereo = self._allenes_stereo allenes_terminals = self._stereo_allenes_terminals cumulenes = {} From 4ab39841563c5832fc91b61ce8f772665f49bd26 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 23 Dec 2024 18:11:08 +0100 Subject: [PATCH 49/68] unpacking reimplemented --- chython/containers/_pack.pyx | 3 +- chython/containers/_unpack.pyx | 70 +++++++++++++++------------------- chython/containers/molecule.py | 11 ++++-- 3 files changed, 40 insertions(+), 44 deletions(-) diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack.pyx index 37b42b34..30ccd1bc 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack.pyx @@ -19,6 +19,7 @@ cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.math cimport ldexp, frexp +from libc.string cimport memset # Format V2 specification:: # @@ -114,7 +115,7 @@ def pack(object molecule): if not data: raise MemoryError() - seen[:] = 0 # erase random data + memset(seen, 0, 4096 * sizeof(unsigned char)) # erase random data # start pack collection data[0] = 2 # header. specification version 2 diff --git a/chython/containers/_unpack.pyx b/chython/containers/_unpack.pyx index aba7ca34..80ab6c59 100644 --- a/chython/containers/_unpack.pyx +++ b/chython/containers/_unpack.pyx @@ -64,18 +64,17 @@ from chython.periodictable import (H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, @cython.cdivision(True) @cython.wraparound(False) def unpack(const unsigned char[::1] data not None): - cdef char *charges cdef unsigned char a, b, c, d, isotope, atomic_number, neighbors_count, s = 0, version, stereo, hydrogens cdef unsigned char *neighbors, *orders - cdef bint *ct_sign cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, order_count cdef unsigned short i, j, k = 0, n, m, buffer_b, shift = 0 - cdef unsigned short *mapping, *cis_trans_1, *cis_trans_2, *connections + cdef unsigned short *mapping, *connections cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift cdef unsigned char[4096] seen cdef object py_mol, py_bond, py_n, py_m, py_atom, py_nan_bool cdef dict py_atoms, py_bonds, py_ngb + cdef list py_cis_trans # read header version = data[0] @@ -93,6 +92,7 @@ def unpack(const unsigned char[::1] data not None): py_mol = MoleculeContainer() py_atoms = py_mol._atoms py_bonds = py_mol._bonds + py_cis_trans = [] # unpack atom block to separate attributes arrays for i in range(atoms_count): @@ -135,9 +135,9 @@ def unpack(const unsigned char[::1] data not None): a = data[atoms_shift + 8] hydrogens = a >> 5 if hydrogens == 7: - py_atom._hydrogens = None + py_atom._implicit_hydrogens = None else: - py_atom._hydrogens = hydrogens + py_atom._implicit_hydrogens = hydrogens py_atom._charge = ((a >> 1) & 0x0f) - 4 if a & 0x01: @@ -214,21 +214,6 @@ def unpack(const unsigned char[::1] data not None): orders[i + 4] = b & 0x7 i += 5 - if cis_trans_count: - cis_trans_1 = PyMem_Malloc(cis_trans_count * sizeof(unsigned short)) - cis_trans_2 = PyMem_Malloc(cis_trans_count * sizeof(unsigned short)) - ct_sign = PyMem_Malloc(cis_trans_count * sizeof(bint)) - if not cis_trans_1 or not cis_trans_2 or not ct_sign: - raise MemoryError() - - for i in range(cis_trans_count): - a, b = data[cis_trans_shift], data[cis_trans_shift + 1] - c, d = data[cis_trans_shift + 2], data[cis_trans_shift + 3] - cis_trans_1[i] = a << 4 | b >> 4 - cis_trans_2[i] = (b & 0x0f) << 8 | c - ct_sign[i] = d # d = 0x01 or 0x00 - cis_trans_shift += 4 - for i in range(atoms_count): n = mapping[i] py_n = n # shared py int obj @@ -243,25 +228,31 @@ def unpack(const unsigned char[::1] data not None): if seen[m]: # bond partially exists. need back-connection. py_ngb[py_m] = py_bonds[py_m][py_n] else: - bond = object.__new__(Bond) - bond._order = orders[k] + 1 - py_ngb[py_m] = bond + py_bond = object.__new__(Bond) + py_bond._order = orders[k] + 1 + py_bond._stereo = None + py_ngb[py_m] = py_bond k += 1 shift += neighbors_count - for i in range(cis_trans_count): - py_cis_trans_stereo[(cis_trans_1[i], cis_trans_2[i])] = ct_sign[i] + PyMem_Free(orders) + PyMem_Free(connections) + + if cis_trans_count: + for i in range(cis_trans_count): + a, b = data[cis_trans_shift], data[cis_trans_shift + 1] + c, d = data[cis_trans_shift + 2], data[cis_trans_shift + 3] + py_n = a << 4 | b >> 4 + py_m = (b & 0x0f) << 8 | c + if d: + py_cis_trans.append((py_n, py_m, True)) + else: + py_cis_trans.append((py_n, py_m, False)) + cis_trans_shift += 4 PyMem_Free(neighbors) PyMem_Free(mapping) - if bonds_count: - PyMem_Free(connections) - PyMem_Free(orders) - if cis_trans_count: - PyMem_Free(cis_trans_1) - PyMem_Free(cis_trans_2) - PyMem_Free(ct_sign) - return py_mol, size + return py_mol, py_cis_trans, size cdef short[119] common_isotopes @@ -273,13 +264,12 @@ common_isotopes[:] = [0, -15, -12, -9, -7, -5, -4, -2, 0, 3, 4, 7, 8, 11, 12, 15 222, 221, 228, 227, 231, 231, 235, 236, 241, 242, 243, 244, 245, 254, 253, 254, 254, 262, 265, 265, 269, 262, 273, 273, 277, 281, 278] - -cdef object[119] elements -elements[:] = [None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, - Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, - Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, - Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, - Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og] +cdef list elements +elements = [None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, + Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, + Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, + Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, + Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og] cdef double double_from_bytes(unsigned char a, unsigned char b): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 16cabc46..1f607829 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -542,7 +542,7 @@ def pack_len(cls, data: bytes, /, *, compressed=True) -> int: return int.from_bytes(data[1:3], 'big') >> 4 @classmethod - def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, + def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, skip_labels_calculation=False, _return_pack_length=False) -> 'MoleculeContainer': """ Unpack from compressed bytes. @@ -555,12 +555,17 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, if compressed: data = decompress(data) if data[0] in (0, 2): - mol, pack_length = unpack(data) + mol, cis_trans, pack_length = unpack(data) + for n, m, s in cis_trans: + mol.bond(*mol._stereo_cis_trans_centers[n])._stereo = s elif data[0] == 3: - mol, pack_length = cpack(data) + mol, cis_trans, pack_length = cpack(data) else: raise ValueError('invalid pack header') + if not skip_labels_calculation: + mol.calc_labels() + if _return_pack_length: return mol, pack_length return mol From 24ce4ece6523d381e10f3eb22e84589163050431 Mon Sep 17 00:00:00 2001 From: stsouko Date: Tue, 24 Dec 2024 13:41:41 +0100 Subject: [PATCH 50/68] modules structure refactored --- build.py | 14 +++++++------- chython/containers/{_pack.pyx => _pack_v2.pyx} | 18 ------------------ .../{_unpack.pyx => _unpack_v0v2.pyx} | 0 .../containers/{_cpack.pyx => _unpack_v3.pyx} | 0 chython/containers/molecule.py | 16 ++++++++-------- 5 files changed, 15 insertions(+), 33 deletions(-) rename chython/containers/{_pack.pyx => _pack_v2.pyx} (96%) rename chython/containers/{_unpack.pyx => _unpack_v0v2.pyx} (100%) rename chython/containers/{_cpack.pyx => _unpack_v3.pyx} (100%) diff --git a/build.py b/build.py index f43339df..7f484611 100644 --- a/build.py +++ b/build.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Ramil Nugmanov +# Copyright 2023, 2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -48,14 +48,14 @@ Extension('chython.algorithms._isomorphism', ['chython/algorithms/_isomorphism.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._pack', - ['chython/containers/_pack.pyx'], + Extension('chython.containers._pack_v2', + ['chython/containers/_pack_v2.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._unpack', - ['chython/containers/_unpack.pyx'], + Extension('chython.containers._unpack_v0v2', + ['chython/containers/_unpack_v0v2.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._cpack', - ['chython/containers/_cpack.pyx'], + Extension('chython.containers._unpack_v3', + ['chython/containers/_unpack_v3.pyx'], extra_compile_args=extra_compile_args), Extension('chython.files._xyz', ['chython/files/_xyz.pyx'], diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack_v2.pyx similarity index 96% rename from chython/containers/_pack.pyx rename to chython/containers/_pack_v2.pyx index 30ccd1bc..6e2a8b19 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack_v2.pyx @@ -49,24 +49,6 @@ from libc.string cimport memset # 7 bit - zero padding. in future can be used for extra bond-level stereo, like atropoisomers. # 1 bit - sign -# stereo block: -# 0000 - no stereo -# 0001 - not in use -# 0010 - allene -# 0011 - allene -# 0100 - not in use -# 0101 - not in use -# 0110 - not in use -# 0111 - not in use -# 1000 - tetrahedron -# 1001 - not in use -# 1010 - not in use -# 1011 - not in use -# 1100 - tetrahedron -# 1101 - not in use -# 1110 - not in use -# 1111 - not in use - @cython.nonecheck(False) @cython.boundscheck(False) diff --git a/chython/containers/_unpack.pyx b/chython/containers/_unpack_v0v2.pyx similarity index 100% rename from chython/containers/_unpack.pyx rename to chython/containers/_unpack_v0v2.pyx diff --git a/chython/containers/_cpack.pyx b/chython/containers/_unpack_v3.pyx similarity index 100% rename from chython/containers/_cpack.pyx rename to chython/containers/_unpack_v3.pyx diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 1f607829..ba8234a6 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -506,7 +506,7 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non :param version: format version :param order: atom order in V3 """ - from ._pack import pack + from ._pack_v2 import pack as pack_v2 if check: bonds = self._bonds @@ -518,9 +518,9 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non raise ValueError('To many neighbors not supported') if version == 2: - data = pack(self) + data = pack_v2(self) elif version == 3: - data = self._cpack(order, check) + data = self._pack_v3(order, check) else: raise ValueError('invalid specification version') if compressed: @@ -549,17 +549,17 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, skip_labe :param compressed: decompress data before processing. """ - from ._unpack import unpack - from ._cpack import unpack as cpack + from ._unpack_v0v2 import unpack as unpack_v0v2 + from ._unpack_v3 import unpack as unpack_v3 if compressed: data = decompress(data) if data[0] in (0, 2): - mol, cis_trans, pack_length = unpack(data) + mol, cis_trans, pack_length = unpack_v0v2(data) for n, m, s in cis_trans: mol.bond(*mol._stereo_cis_trans_centers[n])._stereo = s elif data[0] == 3: - mol, cis_trans, pack_length = cpack(data) + mol, cis_trans, pack_length = unpack_v3(data) else: raise ValueError('invalid pack header') @@ -580,7 +580,7 @@ def unpach(cls, data: Union[bytes, memoryview], /, *, compressed=True) -> 'Molec def __bytes__(self): return self.pack() - def _cpack(self, order=None, check=True): + def _pack_v3(self, order=None, check=True): if order is None: order = list(self._atoms) elif check: From 7281fe81ebfb52499609c8a16c78f815eb2f6ba9 Mon Sep 17 00:00:00 2001 From: stsouko Date: Tue, 24 Dec 2024 14:03:14 +0100 Subject: [PATCH 51/68] V3 specification under change --- chython/containers/molecule.py | 117 ++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 52 deletions(-) diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index ba8234a6..695852b7 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -20,7 +20,6 @@ from collections import Counter, defaultdict from functools import cached_property from typing import Dict, Iterable, List, Optional, Tuple, Union -from weakref import ref from zlib import compress, decompress from .bonds import Bond, DynamicBond, QueryBond from .cgr import CGRContainer @@ -476,31 +475,6 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non 7 bit - zero padding. in future can be used for extra bond-level stereo, like atropoisomers. 1 bit - sign - Format V3 specification:: - - Big endian bytes order - 8 bit - 0x03 (format specification version) - Atom block 3 bytes (repeated): - 1 bit - atom entrance flag (always 1) - 7 bit - atomic number (<=118) - 3 bit - hydrogens (0-7). Note: 7 == None - 4 bit - charge (charge + 4. possible range -4 - 4) - 1 bit - radical state - 1 bit padding - 3 bit tetrahedron/allene sign - (000 - not stereo or unknown, 001 - pure-unknown-enantiomer, 010 or 011 - has stereo) - 4 bit - number of following bonds and CT blocks (0-15) - - Bond block 2 bytes (repeated 0-15 times) - 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) - 4 bit - bond order: 0000 - single, 0001 - double, 0010 - triple, 0011 - aromatic, 0111 - special - - Cis-Trans 2 bytes - 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) - 4 bit - CT sign: 1000 or 1001 - to avoid overlap with bond - - V2 format is faster than V3. V3 format doesn't include isotopes, atom numbers and XY coordinates. - :param compressed: return zlib-compressed pack. :param check: check molecule for format restrictions. :param version: format version @@ -581,6 +555,44 @@ def __bytes__(self): return self.pack() def _pack_v3(self, order=None, check=True): + """ + Format V3 specification: + Big endian bytes order + 8 bit - 0x03 (format specification version) + Atom block 3 bytes (repeated): + 1 bit - atom entrance flag (always 1) + 7 bit - atomic number (<=118) + 3 bit - hydrogens (0-7). Note: 7 == None + 4 bit - charge (charge + 4. possible range -4 - 4) + 1 bit - radical state + 4 bit - atom stereo + ANDx and ORx encode only sign. X value stored in the same order in Stereo group block. + 0000 [same as V2] - no stereo or unknown + 0001 - not used + 0010 - absolute sign False + 0011 - absolute sign True + 0100 - sign False OR1 group + 0101 - sign True OR1 group + 0110 - sign False AND1 group + 0111 - sign True AND1 group + 1000 - sign False OR2 group + 1001 - sign True OR2 group + 1010 - sign False AND2 group + 1011 - sign True AND2 group + 1100 - sign False ORx group + 1101 - sign True ORx group + 1110 - sign False ANDx group + 1111 - sign True ANDx group + 4 bit - number of following bonds and CT blocks (0-15) + + Bond block 2 bytes (repeated 0-15 times) + 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) + 4 bit - bond order: 0000 - single, 0001 - double, 0010 - triple, 0011 - aromatic, 0111 - special + + Cis-Trans 2 bytes + 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) + 4 bit - CT sign: 1000 or 1001 - to avoid overlap with bond + """ if order is None: order = list(self._atoms) elif check: @@ -610,42 +622,43 @@ def _pack_v3(self, order=None, check=True): data = [b'\x03'] for i, n in enumerate(order): seen[n] = i + atom = atoms[n] env = bonds[n] - data.append((0x80 | atoms[n].atomic_number).to_bytes(1, 'big')) + data.append((0x80 | atom.atomic_number).to_bytes(1, 'big')) # 3 bit - hydrogens (0-6, None) | 4 bit - charge | 1 bit - radical - hcr = (charges[n] + 4) << 1 | radicals[n] - if (h := hydrogens[n]) is None: + hcr = (atom.charge + 4) << 1 | atom.is_radical + if atom.implicit_hydrogens is None: hcr |= 0b11100000 else: - hcr |= h << 5 + hcr |= atom.implicit_hydrogens << 5 data.append(hcr.to_bytes(1, 'big')) - if n in atoms_stereo: - if self._translate_tetrahedron_sign(n, [x for x in order if x in env]): - s = 0b0011_0000 - else: - s = 0b0010_0000 - elif n in allenes_stereo: - t1, t2 = allenes_terminals[n] - nn = None - for x in order: - if nn is None: - if x in cumulenes[t1]: - nn = x - flag = True - elif x in cumulenes[t2]: - flag = False - nn = x - elif flag: # noqa - if x in cumulenes[t2]: + if atom.stereo is not None: + if len(env) == 2: + t1, t2 = allenes_terminals[n] + nn = None + for x in order: + if nn is None: + if x in cumulenes[t1]: + nn = x + flag = True + elif x in cumulenes[t2]: + flag = False + nn = x + elif flag: # noqa + if x in cumulenes[t2]: + nm = x + break + elif x in cumulenes[t1]: nm = x break - elif x in cumulenes[t1]: - nm = x - break - if self._translate_allene_sign(n, nn, nm): # noqa + if self._translate_allene_sign(n, nn, nm): # noqa + s = 0b0011_0000 + else: + s = 0b0010_0000 + elif self._translate_tetrahedron_sign(n, [x for x in order if x in env]): s = 0b0011_0000 else: s = 0b0010_0000 From 9b6b55a2294521e41c657d1251cd4e0e2261595a Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 25 Dec 2024 23:23:46 +0100 Subject: [PATCH 52/68] Remove unused ring_atoms prop. Removed redundant `__hash__` implementations from query elements. Morgan hashes refactored. Will affect generated smiles strings. Implemented rings_graph as next level of skin graph. --- chython/algorithms/morgan.py | 3 +- chython/algorithms/rings.py | 79 ++++++++++++++------------- chython/containers/molecule.py | 6 +- chython/periodictable/base/element.py | 3 +- chython/periodictable/base/query.py | 15 ----- 5 files changed, 46 insertions(+), 60 deletions(-) diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index c56b5572..8c8c1b30 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -44,8 +44,7 @@ def atoms_order(self: 'MoleculeContainer') -> Dict[int, int]: return {} elif len(self) == 1: # optimize single atom containers return dict.fromkeys(self, 1) - ring = self.ring_atoms - return _morgan({n: hash((hash(a), n in ring)) for n, a in self.atoms()}, self.int_adjacency) + return _morgan({n: hash(a) for n, a in self.atoms()}, self.int_adjacency) @cached_property def int_adjacency(self: 'MoleculeContainer') -> Dict[int, Dict[int, int]]: diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 4871d5fa..d4b9c1c2 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -51,7 +51,7 @@ def sssr(self) -> List[Tuple[int, ...]]: @cached_property def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: """ - A dictionary with atom numbers as keys and a list of tuples (representing rings) as values. + A dictionary with atom numbers as keys and a list of tuples (representing SSSR rings) as values. """ rings = defaultdict(list) for r in self.sssr: @@ -62,46 +62,10 @@ def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: @cached_property def atoms_rings_sizes(self) -> Dict[int, Set[int]]: """ - Sizes of rings containing atom. + Sizes of SSSR rings containing atom. """ return {n: {len(r) for r in rs} for n, rs in self.atoms_rings.items()} - @cached_property - def ring_atoms(self) -> Set[int]: - """ - Atoms in rings. Not SSSR based fast algorithm. - """ - bonds = _skin_graph(self.not_special_connectivity) - if not bonds: - return set() - - in_rings = set() - atoms = set(bonds) - while atoms: - stack = deque([(atoms.pop(), 0, 0)]) - path = [] - seen = set() - while stack: - c, p, d = stack.pop() - if len(path) > d: - path = path[:d] - if c in in_rings: - continue - path.append(c) - seen.add(c) - - d += 1 - for n in bonds[c]: - if n == p: - continue - elif n in seen: - in_rings.update(path[path.index(n):]) - else: - stack.append((n, c, d)) - - atoms.difference_update(seen) - return in_rings - @cached_property def rings_count(self) -> int: """ @@ -144,6 +108,45 @@ def skin_graph(self: 'MoleculeContainer') -> Dict[int, Set[int]]: """ return _skin_graph(self._bonds) + @cached_property + def rings_graph(self: 'MoleculeContainer'): + """ + Graph of rings. Linkers are not included. Special bonds are considered. + """ + bonds = self.skin_graph + if not bonds: + return bonds + + in_rings = set() + atoms = set(bonds) + while atoms: + stack = deque([(atoms.pop(), 0, 0)]) + path = [] + seen = set() + while stack: + c, p, d = stack.pop() + if len(path) > d: + path = path[:d] + if c in in_rings: + continue + path.append(c) + seen.add(c) + + d += 1 + for n in bonds[c]: + if n == p: + continue + elif n in seen: + in_rings.update(path[path.index(n):]) + else: + stack.append((n, c, d)) + + atoms.difference_update(seen) + for n in bonds.keys() - in_rings: + for m in bonds.pop(n): + bonds[m].discard(n) + return bonds + def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> List[Tuple[int, ...]]: """ diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 695852b7..d16d6e40 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -251,8 +251,7 @@ def copy(self, *, keep_sssr=False, keep_components=False) -> 'MoleculeContainer' if keep_sssr: for k, v in self.__dict__.items(): - if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', - 'ring_atoms', 'not_special_connectivity', 'rings_count'): + if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', 'not_special_connectivity', 'rings_count'): copy.__dict__[k] = v if keep_components: if 'connected_components' in self.__dict__: @@ -840,8 +839,7 @@ def flush_cache(self, *, keep_sssr=False, keep_components=False): if keep_sssr: # good to keep if no new bonds or bonds deletions or bonds to/from any change for k, v in self.__dict__.items(): - if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', - 'ring_atoms', 'not_special_connectivity', 'rings_count'): + if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', 'not_special_connectivity', 'rings_count'): backup[k] = v if keep_components: # good to keep if no new bonds or bonds deletions diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 1185d661..1d066f07 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -342,7 +342,8 @@ def __eq__(self, other): self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical def __hash__(self): - return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.implicit_hydrogens or 0)) + return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, + self.implicit_hydrogens or 0, self.in_ring)) def valence_rules(self, valence: int) -> \ List[Tuple[Set[Tuple[int, 'Element']], Dict[Tuple[int, 'Element'], int], int]]: diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 70d1588e..c955da28 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -236,9 +236,6 @@ def __eq__(self, other): return False return True - def __hash__(self): - return hash((self.neighbors, self.hybridization)) - class AnyElement(ExtendedQuery): __slots__ = () @@ -273,10 +270,6 @@ def __eq__(self, other): return False return True - def __hash__(self): - return hash((self.charge, self.is_radical, self.neighbors, self.hybridization, - self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - class ListElement(ExtendedQuery): __slots__ = ('_elements', '__dict__') @@ -339,10 +332,6 @@ def __eq__(self, other): return False return True - def __hash__(self): - return hash((self.atomic_numbers, self.charge, self.is_radical, self.neighbors, self.hybridization, - self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - def __repr__(self): return f'{self.__class__.__name__}([{self.atomic_symbol}])' @@ -474,9 +463,5 @@ def __eq__(self, other): return False return True - def __hash__(self): - return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.neighbors, - self.hybridization, self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - __all__ = ['Query', 'ExtendedQuery', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] From c1be11a0f813323e8d5f81f849b4251cff7df4c9 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 19 Jan 2025 17:07:46 +0100 Subject: [PATCH 53/68] isomorphism extension refactored into structs --- chython/algorithms/_isomorphism.pyx | 179 ++++++++++++++++------------ chython/algorithms/isomorphism.py | 35 ++++-- 2 files changed, 132 insertions(+), 82 deletions(-) diff --git a/chython/algorithms/_isomorphism.pyx b/chython/algorithms/_isomorphism.pyx index f701f4e5..3d39f200 100644 --- a/chython/algorithms/_isomorphism.pyx +++ b/chython/algorithms/_isomorphism.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # Copyright 2021 Aleksandr Sizov # This file is part of chython. # @@ -24,52 +24,85 @@ from libc.string cimport memset cdef extern from "Python.h": dict _PyDict_NewPresized(Py_ssize_t minused) +cdef packed struct atom_t: + unsigned long long bits1 + unsigned long long bits2 + unsigned long long bits3 + unsigned long long bits4 + unsigned int from_ + unsigned int to_ + unsigned int mapping + +cdef packed struct bond_t: + unsigned long long bond + unsigned int index + +cdef packed struct molecule_t: + unsigned int atoms_count + atom_t *atoms + bond_t *bonds + +cdef packed struct q_atom_t: + unsigned long long mask1 + unsigned long long mask2 + unsigned long long mask3 + unsigned long long mask4 + unsigned int back + unsigned int closure + unsigned int from_ + unsigned int to_ + unsigned int mapping + +cdef packed struct query_t: + unsigned int atoms_count + q_atom_t *atoms + bond_t *bonds + @cython.boundscheck(False) @cython.wraparound(False) -def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back not None, - unsigned long long[::1] q_masks1 not None, unsigned long long[::1] q_masks2 not None, - unsigned long long[::1] q_masks3 not None, unsigned long long[::1] q_masks4 not None, - unsigned int[::1] q_closures not None, unsigned int[::1] q_from not None, - unsigned int[::1] q_to not None, unsigned int[::1] q_indices not None, - unsigned long long[::1] q_bonds not None, unsigned long[::1] o_numbers not None, - unsigned long long[::1] o_bits1 not None, unsigned long long[::1] o_bits2 not None, - unsigned long long[::1] o_bits3 not None, unsigned long long[::1] o_bits4 not None, - unsigned long long[::1] o_bonds not None, unsigned int[::1] o_from not None, - unsigned int[::1] o_to not None, unsigned int[::1] o_indices not None, - unsigned int[::1] scope not None): +def get_mapping(const unsigned char[::1] q_buffer not None, const unsigned char[::1] m_buffer not None, + const unsigned int[::1] scope not None): # expected less than 2^16 atoms in structure. - cdef unsigned int stack = 0, path_size = 0, q_size, q_size_dec, o_size, depth, front, back, closures_num - cdef unsigned int n, m, o, i, j, closures_counter - cdef unsigned long long q_mask1, q_mask2, q_mask3, q_mask4, o_bond, c_bond + cdef unsigned int stack = 0, path_size = 0, depth, front, q_decrement + cdef unsigned int n, m, i, j, closures_counter + cdef unsigned long long c_bond cdef dict mapping - - q_size = len(q_numbers) - q_size_dec = q_size - 1 - o_size = len(o_numbers) - cdef unsigned int *path = PyMem_Malloc(q_size_dec * sizeof(unsigned int)) - cdef unsigned int *stack_index = PyMem_Malloc(2 * o_size * sizeof(unsigned int)) - cdef unsigned int *stack_depth = PyMem_Malloc(2 * o_size * sizeof(unsigned int)) - cdef bint *matched = PyMem_Malloc(o_size * sizeof(bint)) - cdef unsigned long long *o_closures = PyMem_Malloc(o_size * sizeof(unsigned long long)) - - if not path or not stack_index or not stack_depth or not matched or not o_closures: + cdef query_t query + cdef molecule_t molecule + cdef q_atom_t q_atom + cdef atom_t n_atom, m_atom + cdef bond_t i_bond, j_bond + + query.atoms_count = ( &q_buffer[0])[0] + query.atoms = (&q_buffer[0] + 4) + query.bonds = (&query.atoms[0] + query.atoms_count) + q_decrement = query.atoms_count - 1 + + molecule.atoms_count = ( &m_buffer[0])[0] + molecule.atoms = (&m_buffer[0] + 4) + molecule.bonds = (&molecule.atoms[0] + molecule.atoms_count) + + cdef unsigned int *path = PyMem_Malloc(q_decrement * sizeof(unsigned int)) + cdef unsigned int *stack_index = PyMem_Malloc(2 * molecule.atoms_count * sizeof(unsigned int)) + cdef unsigned int *stack_depth = PyMem_Malloc(2 * molecule.atoms_count * sizeof(unsigned int)) + cdef bint *matched = PyMem_Malloc(molecule.atoms_count * sizeof(bint)) + cdef unsigned long long *closures = PyMem_Malloc(molecule.atoms_count * sizeof(unsigned long long)) + + if not path or not stack_index or not stack_depth or not matched or not closures: raise MemoryError() - memset(matched, 0, o_size * sizeof(bint)) - memset(o_closures, 0, o_size * sizeof(unsigned long long)) + memset(matched, 0, molecule.atoms_count * sizeof(bint)) + memset(closures, 0, molecule.atoms_count * sizeof(unsigned long long)) - # find entry-points. - q_mask1 = q_masks1[0] - q_mask2 = q_masks2[0] - q_mask3 = q_masks3[0] - q_mask4 = q_masks4[0] - for n in range(o_size): + q_atom = query.atoms[0] + for n in range(molecule.atoms_count): + n_atom = molecule.atoms[n] if (scope[n] and - q_mask1 & o_bits1[n] and # o_bits1 doesn't contain bond bits. - q_mask2 & o_bits2[n] == o_bits2[n] and - q_mask3 & o_bits3[n] == o_bits3[n] and - q_mask4 & o_bits4[n]): + q_atom.mask1 & n_atom.bits1 and # bits1 doesn't contain bond bits. + q_atom.mask2 & n_atom.bits2 == n_atom.bits2 and + q_atom.mask3 & n_atom.bits3 == n_atom.bits3 and + q_atom.mask4 & n_atom.bits4): stack_index[stack] = n stack_depth[stack] = 0 @@ -81,11 +114,11 @@ def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back depth = stack_depth[stack] n = stack_index[stack] - if depth == q_size_dec: - mapping = _PyDict_NewPresized(q_size) + if depth == q_decrement: + mapping = _PyDict_NewPresized(query.atoms_count) for i in range(depth): - mapping[q_numbers[i]] = o_numbers[path[i]] - mapping[q_numbers[depth]] = o_numbers[n] + mapping[query.atoms[i].mapping] = molecule.atoms[path[i]].mapping + mapping[query.atoms[depth].mapping] = molecule.atoms[n].mapping yield mapping else: if path_size != depth: # dead end reached @@ -98,40 +131,37 @@ def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back path_size += 1 front = depth + 1 - back = q_back[front] - if back != depth: # branch - n = path[back] - # load next query atom - q_mask1 = q_masks1[front] - q_mask2 = q_masks2[front] - q_mask3 = q_masks3[front] - q_mask4 = q_masks4[front] - closures_num = q_closures[front] - - for i in range(o_from[n], o_to[n]): - o_bond = o_bonds[i] - m = o_indices[i] + q_atom = query.atoms[front] + if q_atom.back != depth: # branch + n = path[q_atom.back] + + n_atom = molecule.atoms[n] + for i in range(n_atom.from_, n_atom.to_): + i_bond = molecule.bonds[i] + m = i_bond.index + m_atom = molecule.atoms[m] if (scope[m] and not matched[m] and - q_mask1 & o_bond == o_bond and # bond order, in ring mark and atom bit should match. - q_mask2 & o_bits2[m] == o_bits2[m] and - q_mask3 & o_bits3[m] == o_bits3[m] and - q_mask4 & o_bits4[m]): + q_atom.mask1 & i_bond.bond == i_bond.bond and # bond order, in ring mark and atom bit should match. + q_atom.mask2 & m_atom.bits2 == m_atom.bits2 and + q_atom.mask3 & m_atom.bits3 == m_atom.bits3 and + q_atom.mask4 & m_atom.bits4): - if closures_num: # candidate atom should have same closures. + if q_atom.closure: # candidate atom should have same closures. closures_counter = 0 # make a map of closures for o_n atom # an index is a neighbor atom and a value is a bond between o_n and the neighbor - for j in range(o_from[m], o_to[m]): - o = o_indices[j] - if o != n and matched[o]: - o_closures[o] = o_bonds[j] + for j in range(m_atom.from_, m_atom.to_): + j_bond = molecule.bonds[j] + if j_bond.index != n and matched[j_bond.index]: + closures[j_bond.index] = j_bond.bond closures_counter += 1 - if closures_counter == closures_num: - for j in range(q_from[front], q_to[front]): - c_bond = o_closures[path[q_indices[j]]] - if not c_bond or q_bonds[j] & c_bond != c_bond: # compare order and ring bits + if closures_counter == q_atom.closure: + for j in range(q_atom.from_, q_atom.to_): + j_bond = query.bonds[j] + c_bond = closures[path[j_bond.index]] + if not c_bond or j_bond.bond & c_bond != c_bond: # compare order and ring bits break else: stack_index[stack] = m @@ -139,12 +169,13 @@ def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back stack += 1 # fill an array with nulls - for j in range(o_from[m], o_to[m]): - o_closures[o_indices[j]] = 0 + for j in range(m_atom.from_, m_atom.to_): + j_bond = molecule.bonds[j] + closures[j_bond.index] = 0 else: # candidate atom should not have closures. - for j in range(o_from[m], o_to[m]): - o = o_indices[j] - if o != n and matched[o]: + for j in range(m_atom.from_, m_atom.to_): + j_bond = molecule.bonds[j] + if j_bond.index != n and matched[j_bond.index]: break # found closure else: stack_index[stack] = m @@ -155,4 +186,4 @@ def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back PyMem_Free(matched) PyMem_Free(stack_index) PyMem_Free(stack_depth) - PyMem_Free(o_closures) + PyMem_Free(closures) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index dc062591..1dfdfe20 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2024 Ramil Nugmanov +# Copyright 2018-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,7 +19,9 @@ from array import array from collections import defaultdict, deque from functools import cached_property, partial +from io import BytesIO from itertools import permutations +from struct import Struct from typing import Any, Collection, Dict, Iterator, Optional, TYPE_CHECKING, Union from .._functions import lazy_product from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement, QueryElement @@ -30,6 +32,12 @@ from chython.containers import MoleculeContainer +header_struct = Struct('I') +m_atom_struct = Struct('QQQQIII') +q_atom_struct = Struct('QQQQIIIII') +bond_struct = Struct('QI') + + class Isomorphism: __slots__ = () @@ -260,8 +268,13 @@ def _cython_compiled_structure(self: 'MoleculeContainer'): start += len(ms) o_to[i] = start - return (array('L', numbers), array('Q', bits1), array('Q', bits2), array('Q', bits3), array('Q', bits4), - array('Q', bonds), array('I', o_from), array('I', o_to), array('I', indices)) + buffer = BytesIO() + buffer.write(header_struct.pack(len(numbers))) + for x in zip(bits1, bits2, bits3, bits4, o_from, o_to, numbers): + buffer.write(m_atom_struct.pack(*x)) + for x in zip(bonds, indices): + buffer.write(bond_struct.pack(*x)) + return buffer.getvalue() class QueryIsomorphism(Isomorphism): @@ -291,7 +304,7 @@ def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: boo components = self._cython_compiled_query # override to cython data def get_mapping(query, scope): - return _cython_get_mapping(*query, *other._cython_compiled_structure, + return _cython_get_mapping(query, other._cython_compiled_structure, array('I', [n in scope for n in other])) else: components = get_mapping = None @@ -536,10 +549,16 @@ def _cython_compiled_query(self): indices[j] = mapping[m] start += len(ms) q_to[i] = start - components.append((array('L', [n for n, *_ in c]), array('I', [0] + [mapping[x] for _, x, *_ in c[1:]]), - array('Q', masks1), array('Q', masks2), array('Q', masks3), array('Q', masks4), - array('I', closures), array('I', q_from), array('I', q_to), - array('I', indices), array('Q', bonds))) + + back = [0] + [mapping[x] for _, x, *_ in c[1:]] + numbers = [n for n, *_ in c] + buffer = BytesIO() + buffer.write(header_struct.pack(len(numbers))) + for x in zip(masks1, masks2, masks3, masks4, back, closures, q_from, q_to, numbers): + buffer.write(q_atom_struct.pack(*x)) + for x in zip(bonds, indices): + buffer.write(bond_struct.pack(*x)) + components.append(buffer.getvalue()) return components From 673debf6053fe2970d5f5989175dc3b4cc27caf8 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 19 Jan 2025 17:25:42 +0100 Subject: [PATCH 54/68] clean2d module refactored --- README.rst | 2 - chython/algorithms/calculate2d/__init__.py | 188 +-------------------- chython/algorithms/calculate2d/molecule.py | 149 ++++++++++++++++ chython/algorithms/calculate2d/reaction.py | 80 +++++++++ chython/algorithms/rings.py | 6 +- chython/containers/_pack_v2.pyx | 6 +- chython/containers/_unpack_v0v2.pyx | 11 +- chython/containers/molecule.py | 5 +- chython/periodictable/base/element.py | 33 ++-- chython/periodictable/base/vector.py | 119 +++++++++++++ chython/utils/grid.py | 7 +- chython/utils/retro.py | 7 +- 12 files changed, 384 insertions(+), 229 deletions(-) create mode 100644 chython/algorithms/calculate2d/molecule.py create mode 100644 chython/algorithms/calculate2d/reaction.py create mode 100644 chython/periodictable/base/vector.py diff --git a/README.rst b/README.rst index bbe8d4e8..0040e8b7 100644 --- a/README.rst +++ b/README.rst @@ -31,8 +31,6 @@ Install Only python 3.8+. -Note: for using `clean2d` install NodeJS into system. - * **stable version available through PyPI**:: pip install chython diff --git a/chython/algorithms/calculate2d/__init__.py b/chython/algorithms/calculate2d/__init__.py index c8fe17a5..f0c35a13 100644 --- a/chython/algorithms/calculate2d/__init__.py +++ b/chython/algorithms/calculate2d/__init__.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov -# Copyright 2019, 2020 Dinar Batyrshin +# Copyright 2019-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,189 +16,8 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from math import sqrt -from random import random -from typing import TYPE_CHECKING, Union -from ...exceptions import ImplementationError - - -try: - from importlib.resources import files -except ImportError: # python3.8 - from importlib_resources import files - - -if TYPE_CHECKING: - from chython import ReactionContainer, MoleculeContainer - -try: - from py_mini_racer.py_mini_racer import MiniRacer, JSEvalException - - ctx = MiniRacer() - ctx.eval('const self = this') - ctx.eval(files(__package__).joinpath('clean2d.js').read_text()) -except RuntimeError: - ctx = None - - -class Calculate2DMolecule: - __slots__ = () - - def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule']): - """ - Calculate 2d layout of graph. https://pubs.acs.org/doi/10.1021/acs.jcim.7b00425 JS implementation used. - """ - if ctx is None: - raise ImportError('py_mini_racer is not installed or broken') - plane = {} - entry = iter(sorted(self, key=lambda n: len(self._bonds[n]))) - for _ in range(min(5, len(self))): - smiles, order = self.__clean2d_prepare(next(entry)) - try: - xy = ctx.call('$.clean2d', smiles) - except JSEvalException: - continue - break - else: - raise ImplementationError - - shift_x, shift_y = xy[0] - for n, (x, y) in zip(order, xy): - plane[n] = (x - shift_x, shift_y - y) - - bonds = [] - for n, m, _ in self.bonds(): - xn, yn = plane[n] - xm, ym = plane[m] - bonds.append(sqrt((xm - xn) ** 2 + (ym - yn) ** 2)) - if bonds: - bond_reduce = sum(bonds) / len(bonds) / .825 - else: - bond_reduce = 1. - - atoms = self._atoms - for n, (x, y) in plane.items(): - a = atoms[n] - a._x = x / bond_reduce - a._y = y / bond_reduce - - if self.connected_components_count > 1: - shift_x = 0. - for c in self.connected_components: - shift_x = self._fix_plane_mean(shift_x, component=c) + .9 - self.__dict__.pop('__cached_method__repr_svg_', None) - - def _fix_plane_mean(self: 'MoleculeContainer', shift_x: float, shift_y=0., component=None) -> float: - atoms = self._atoms - if component is None: - component = atoms - - left_atom = atoms[min(component, key=lambda x: atoms[x].x)] - right_atom = atoms[max(component, key=lambda x: atoms[x].x)] - - min_x = left_atom.x - shift_x - if len(left_atom.atomic_symbol) == 2: - min_x -= .2 - - max_x = right_atom.x - min_x - min_y = min(atoms[x].y for x in component) - max_y = max(atoms[x].y for x in component) - mean_y = (max_y + min_y) / 2 - shift_y - for n in component: - a = atoms[n] - a._x -= min_x - a._y -= mean_y - - if -.18 <= right_atom.y <= .18: - factor = right_atom.implicit_hydrogens - if factor == 1: - max_x += .15 - elif factor: - max_x += .25 - return max_x - - def _fix_plane_min(self: 'MoleculeContainer', shift_x: float, shift_y=0., component=None) -> float: - atoms = self._atoms - if component is None: - component = atoms - - right_atom = atoms[max(component, key=lambda x: atoms[x].x)] - min_x = min(atoms[x].x for x in component) - shift_x - max_x = right_atom.x - min_x - min_y = min(atoms[x].y for x in component) - shift_y - - for n in component: - a = atoms[n] - a._x -= min_x - a._y -= min_y - - if shift_y - .18 <= right_atom.y <= shift_y + .18: - factor = right_atom.implicit_hydrogens - if factor == 1: - max_x += .15 - elif factor: - max_x += .25 - return max_x - - def __clean2d_prepare(self: 'MoleculeContainer', entry): - w = {n: random() for n in self._atoms} - w[entry] = -1 - smiles, order = self._smiles(w.__getitem__, random=True, charges=False, stereo=False, _return_order=True) - return ''.join(smiles).replace('~', '-'), order - - -class Calculate2DReaction: - __slots__ = () - - def clean2d(self: 'ReactionContainer'): - """ - Recalculate 2d coordinates - """ - for m in self.molecules(): - m.clean2d() - self.fix_positions() - - def fix_positions(self: 'ReactionContainer'): - """ - Fix coordinates of molecules in reaction - """ - shift_x = 0 - reactants = self.reactants - amount = len(reactants) - 1 - signs = [] - for m in reactants: - max_x = m._fix_plane_mean(shift_x) - if amount: - max_x += .2 - signs.append(max_x) - amount -= 1 - shift_x = max_x + 1 - arrow_min = shift_x - - if self.reagents: - shift_x += .4 - for m in self.reagents: - max_x = m._fix_plane_min(shift_x, .5) - shift_x = max_x + 1 - shift_x += .4 - if shift_x - arrow_min < 3: - shift_x = arrow_min + 3 - else: - shift_x += 3 - arrow_max = shift_x - 1 - - products = self.products - amount = len(products) - 1 - for m in products: - max_x = m._fix_plane_mean(shift_x) - if amount: - max_x += .2 - signs.append(max_x) - amount -= 1 - shift_x = max_x + 1 - self._arrow = (arrow_min, arrow_max) - self._signs = tuple(signs) - self.flush_cache() +from .molecule import * +from .reaction import * __all__ = ['Calculate2DMolecule', 'Calculate2DReaction'] diff --git a/chython/algorithms/calculate2d/molecule.py b/chython/algorithms/calculate2d/molecule.py new file mode 100644 index 00000000..b2500727 --- /dev/null +++ b/chython/algorithms/calculate2d/molecule.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019-2025 Ramil Nugmanov +# Copyright 2019, 2020 Dinar Batyrshin +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from math import sqrt +from random import random +from typing import TYPE_CHECKING, Union, Dict +from ...exceptions import ImplementationError +from ...periodictable.base.vector import Vector + + +try: + from importlib.resources import files +except ImportError: # python3.8 + from importlib_resources import files + + +if TYPE_CHECKING: + from chython import MoleculeContainer + +try: + from py_mini_racer.py_mini_racer import MiniRacer, JSEvalException + + ctx = MiniRacer() + ctx.eval('const self = this') + ctx.eval(files(__package__).joinpath('clean2d.js').read_text()) +except RuntimeError: + ctx = None + + +class Calculate2DMolecule: + __slots__ = () + _atoms: Dict[int, 'Element'] + _bonds: Dict[int, Dict[int, 'Bond']] + + def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule']): + """ + Calculate 2d layout of graph. https://pubs.acs.org/doi/10.1021/acs.jcim.7b00425 JS implementation used. + """ + if ctx is None: + raise ImportError('py_mini_racer is not installed or broken') + plane = {} + entry = iter(sorted(self, key=lambda n: len(self._bonds[n]))) + for _ in range(min(5, len(self))): + smiles, order = self.__clean2d_prepare(next(entry)) + try: + xy = ctx.call('$.clean2d', smiles) + except JSEvalException: + continue + break + else: + raise ImplementationError + + shift_x, shift_y = xy[0] + for n, (x, y) in zip(order, xy): + plane[n] = (x - shift_x, shift_y - y) + + bonds = [] + for n, m, _ in self.bonds(): + xn, yn = plane[n] + xm, ym = plane[m] + bonds.append(sqrt((xm - xn) ** 2 + (ym - yn) ** 2)) + if bonds: + bond_reduce = sum(bonds) / len(bonds) / .825 + else: + bond_reduce = 1. + + atoms = self._atoms + for n, (x, y) in plane.items(): + atoms[n].xy = (x / bond_reduce, y / bond_reduce) + + if self.connected_components_count > 1: + shift_x = 0. + for c in self.connected_components: + shift_x = self._fix_plane_mean(shift_x, component=c) + .9 + self.__dict__.pop('__cached_method__repr_svg_', None) + + def _fix_plane_mean(self, shift_x: float, shift_y=0., component=None) -> float: + atoms = self._atoms + if component is None: + component = atoms + + left_atom = atoms[min(component, key=lambda x: atoms[x].x)] + right_atom = atoms[max(component, key=lambda x: atoms[x].x)] + + min_x = left_atom.x - shift_x + if len(left_atom.atomic_symbol) == 2: + min_x -= .2 + + max_x = right_atom.x - min_x + min_y = min(atoms[x].y for x in component) + max_y = max(atoms[x].y for x in component) + mean_y = (max_y + min_y) / 2 - shift_y + delta = Vector(min_x, mean_y) + for n in component: + atoms[n].xy -= delta + + if -.18 <= right_atom.y <= .18: + factor = right_atom.implicit_hydrogens + if factor == 1: + max_x += .15 + elif factor: + max_x += .25 + return max_x + + def _fix_plane_min(self, shift_x: float, shift_y=0., component=None) -> float: + atoms = self._atoms + if component is None: + component = atoms + + right_atom = atoms[max(component, key=lambda x: atoms[x].x)] + min_x = min(atoms[x].x for x in component) - shift_x + max_x = right_atom.x - min_x + min_y = min(atoms[x].y for x in component) - shift_y + delta = Vector(min_x, min_y) + for n in component: + atoms[n].xy -= delta + + if shift_y - .18 <= right_atom.y <= shift_y + .18: + factor = right_atom.implicit_hydrogens + if factor == 1: + max_x += .15 + elif factor: + max_x += .25 + return max_x + + def __clean2d_prepare(self: 'MoleculeContainer', entry): + w = {n: random() for n in self._atoms} + w[entry] = -1 + smiles, order = self._smiles(w.__getitem__, random=True, charges=False, stereo=False, _return_order=True) + return ''.join(smiles).replace('~', '-'), order + + +__all__ = ['Calculate2DMolecule'] diff --git a/chython/algorithms/calculate2d/reaction.py b/chython/algorithms/calculate2d/reaction.py new file mode 100644 index 00000000..536643aa --- /dev/null +++ b/chython/algorithms/calculate2d/reaction.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019-2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from typing import TYPE_CHECKING + + +if TYPE_CHECKING: + from chython import ReactionContainer + + +class Calculate2DReaction: + __slots__ = () + + def clean2d(self: 'ReactionContainer'): + """ + Recalculate 2d coordinates + """ + for m in self.molecules(): + m.clean2d() + self.fix_positions() + + def fix_positions(self: 'ReactionContainer'): + """ + Fix coordinates of molecules in reaction + """ + shift_x = 0 + reactants = self.reactants + amount = len(reactants) - 1 + signs = [] + for m in reactants: + max_x = m._fix_plane_mean(shift_x) + if amount: + max_x += .2 + signs.append(max_x) + amount -= 1 + shift_x = max_x + 1 + arrow_min = shift_x + + if self.reagents: + shift_x += .4 + for m in self.reagents: + max_x = m._fix_plane_min(shift_x, .5) + shift_x = max_x + 1 + shift_x += .4 + if shift_x - arrow_min < 3: + shift_x = arrow_min + 3 + else: + shift_x += 3 + arrow_max = shift_x - 1 + + products = self.products + amount = len(products) - 1 + for m in products: + max_x = m._fix_plane_mean(shift_x) + if amount: + max_x += .2 + signs.append(max_x) + amount -= 1 + shift_x = max_x + 1 + self._arrow = (arrow_min, arrow_max) + self._signs = tuple(signs) + self.flush_cache(keep_molecule_cache=True) + + +__all__ = ['Calculate2DReaction'] diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index d4b9c1c2..f7dc58de 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -113,9 +113,9 @@ def rings_graph(self: 'MoleculeContainer'): """ Graph of rings. Linkers are not included. Special bonds are considered. """ - bonds = self.skin_graph + bonds = {n: ms.copy() for n, ms in self.skin_graph.items()} if not bonds: - return bonds + return {} in_rings = set() atoms = set(bonds) diff --git a/chython/containers/_pack_v2.pyx b/chython/containers/_pack_v2.pyx index 6e2a8b19..f216d299 100644 --- a/chython/containers/_pack_v2.pyx +++ b/chython/containers/_pack_v2.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -154,8 +154,8 @@ def pack(object molecule): data[atoms_shift + 3] = isotope << 7 | atomic_number # 1bI , A # 2 float16 big endian - double_to_float16(py_atom._x, &data[atoms_shift + 4]) - double_to_float16(py_atom._y, &data[atoms_shift + 6]) + double_to_float16(py_atom.x, &data[atoms_shift + 4]) + double_to_float16(py_atom.y, &data[atoms_shift + 6]) data[atoms_shift + 8] = hcr atoms_shift += 9 diff --git a/chython/containers/_unpack_v0v2.pyx b/chython/containers/_unpack_v0v2.pyx index 80ab6c59..a4903ad8 100644 --- a/chython/containers/_unpack_v0v2.pyx +++ b/chython/containers/_unpack_v0v2.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # cython: language_level=3 # -# Copyright 2021-2024 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -29,6 +29,7 @@ from chython.periodictable import (H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og) +from chython.periodictable.base.vector import Vector # Format specification:: @@ -72,7 +73,7 @@ def unpack(const unsigned char[::1] data not None): cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift cdef unsigned char[4096] seen - cdef object py_mol, py_bond, py_n, py_m, py_atom, py_nan_bool + cdef object py_mol, py_bond, py_n, py_m, py_atom, py_nan_bool, py_vector cdef dict py_atoms, py_bonds, py_ngb cdef list py_cis_trans @@ -127,10 +128,12 @@ def unpack(const unsigned char[::1] data not None): else: py_atom._isotope = None + py_vector = object.__new__(Vector) a, b = data[atoms_shift + 4], data[atoms_shift + 5] - py_atom._x = double_from_bytes(a, b) + py_vector.x = double_from_bytes(a, b) a, b = data[atoms_shift + 6], data[atoms_shift + 7] - py_atom._y = double_from_bytes(a, b) + py_vector.y = double_from_bytes(a, b) + py_atom._xy = py_vector a = data[atoms_shift + 8] hydrogens = a >> 5 diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index d16d6e40..d8dd30b3 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,6 +19,7 @@ from CachedMethods import cached_args_method from collections import Counter, defaultdict from functools import cached_property +from numpy import uint, zeros from typing import Dict, Iterable, List, Optional, Tuple, Union from zlib import compress, decompress from .bonds import Bond, DynamicBond, QueryBond @@ -106,8 +107,6 @@ def adjacency_matrix(self, set_bonds=False, /): :param set_bonds: if True set bond orders instead of 1. """ - from numpy import uint, zeros - adj = zeros((len(self), len(self)), dtype=uint) mapping = {n: x for x, n in enumerate(self._atoms)} if set_bonds: diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 1d066f07..8fdf99c8 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,11 +20,12 @@ from CachedMethods import class_cached_property from collections import defaultdict from typing import Dict, List, Optional, Set, Tuple, Type +from .vector import Vector from ...exceptions import ValenceError class Element(ABC): - __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens', + __slots__ = ('_isotope', '_charge', '_is_radical', '_xy', '_implicit_hydrogens', '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_neighbors', '_heteroatoms', '_hybridization', '_ring_sizes', '_in_ring') __class_cache__ = {} @@ -45,7 +46,7 @@ def __init__(self, isotope: Optional[int] = None, *, self.isotope = isotope self.charge = charge self.is_radical = is_radical - self.x, self.y = x, y + self._xy = Vector(x, y) self._implicit_hydrogens = implicit_hydrogens self._stereo = stereo @@ -179,42 +180,33 @@ def x(self) -> float: """ X coordinate of atom on 2D plane """ - return self._x + return self._xy.x @x.setter def x(self, value: float): - if not isinstance(value, float): - raise TypeError('float expected') - self._x = value + self._xy.x = value @property def y(self) -> float: """ Y coordinate of atom on 2D plane """ - return self._y + return self._xy.y @y.setter def y(self, value: float): - if not isinstance(value, float): - raise TypeError('float expected') - self._y = value + self._xy.y = value @property - def xy(self) -> Tuple[float, float]: + def xy(self) -> Vector: """ (X, Y) coordinates of atom on 2D plane """ - return self._x, self._y + return self._xy @xy.setter def xy(self, value: Tuple[float, float]): - if (not isinstance(value, (tuple, list)) - or len(value) != 2 - or not isinstance(value[0], float) - or not isinstance(value[1], float)): - raise TypeError('tuple of 2 floats expected') - self._x, self._y = value + self._xy = Vector(*value) @property def implicit_hydrogens(self) -> Optional[int]: @@ -279,8 +271,7 @@ def copy(self, full=False, hydrogens=False, stereo=False) -> 'Element': copy._isotope = self.isotope copy._charge = self.charge copy._is_radical = self.is_radical - copy._x = self.x - copy._y = self.y + copy._xy = self.xy if full: copy._implicit_hydrogens = self.implicit_hydrogens copy._stereo = self.stereo diff --git a/chython/periodictable/base/vector.py b/chython/periodictable/base/vector.py new file mode 100644 index 00000000..c23d2773 --- /dev/null +++ b/chython/periodictable/base/vector.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2024 Denis Lipatov +# Copyright 2024 Vyacheslav Grigorev +# Copyright 2024 Timur Gimadiev +# Copyright 2024, 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from math import cos, sin, hypot, atan2 + + +class Vector: + __slots__ = ('x', 'y') + + def __init__(self, x: float = 0., y: float = 0.): + self.x = x + self.y = y + + def __repr__(self): + return f'Vector({self.x}, {self.y})' + + def __neg__(self): + """ + A class method that inverts the current coordinates of objects of the class + """ + return Vector(-self.x, -self.y) + + def __sub__(self, vector: 'Vector'): + """ + A method for the operation of subtraction between vectors + """ + return Vector(self.x - vector.x, self.y - vector.y) + + def __add__(self, vector: 'Vector'): + """ + A method for the operation of addition between vectors + """ + return Vector(self.x + vector.x, self.y + vector.y) + + def __truediv__(self, scalar: float): + """ + A class method that divides the coordinates of the vector by a given scalar + """ + return Vector(self.x / scalar, self.y / scalar) + + def __mul__(self, scalar: float): + """ + Multiplies the coordinates of the current vector by an arbitrary real number + """ + return Vector(self.x * scalar, self.y * scalar) + + def __float__(self): + """ + Calculates the length of the current vector + + Returns float + """ + return hypot(self.x, self.y) + + def __iter__(self): + yield self.x + yield self.y + + def __len__(self): + return 2 + + def __matmul__(self, vector: 'Vector'): + return self.x * vector.y - self.y * vector.x + + def __or__(self, vector: 'Vector'): + """ + Calculate distance between two vectors + """ + return hypot(vector.x - self.x, vector.y - self.y) + + def rotate(self, angle: float, vector: 'Vector' = None): + """ + A method that rotates the vector by the angle in radians + """ + c = cos(angle) + s = sin(angle) + if vector is None: + return Vector(self.x * c - self.y * s, self.x * s + self.y * c) + xy = self - vector + return vector + Vector(xy.x * c - xy.y * s, xy.x * s + xy.y * c) + + def normalise(self): + """ + Normalization of coordinates (dividing them by the length of the vector itself) + """ + if ln := float(self): + return self / ln + return self + + def angle(self, vector: 'Vector' = None) -> float: + """ + A method calculates the angle of inclination of the current vector + or the vector between given vector and the current vector. + """ + if vector is None: + return atan2(self.y, self.x) + else: + return atan2(vector.y - self.y, vector.x - self.x) + + +__all__ = ['Vector'] diff --git a/chython/utils/grid.py b/chython/utils/grid.py index 1a771718..01b5dd90 100644 --- a/chython/utils/grid.py +++ b/chython/utils/grid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2024 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # @@ -88,9 +88,8 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] # restore planes for m, p in zip(molecules, planes): - for (_, a), (x, y) in zip(m.atoms(), p): - a.x = x - a.y = y + for (_, a), xy in zip(m.atoms(), p): + a.xy = xy _width = shift_x - 1.5 * font_size _height = -shift_y - 1.5 * font_size diff --git a/chython/utils/retro.py b/chython/utils/retro.py index 8fa1aaec..cd84b184 100644 --- a/chython/utils/retro.py +++ b/chython/utils/retro.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2024 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # Copyright 2021 Alexander Sizov # This file is part of chython. # @@ -91,9 +91,8 @@ def retro_depict(tree: Tree, *, y_gap=3., x_gap=5., width=None, height=None, cle y_shift -= h + y_gap render.append(m.depict(_embedding=True)[:5]) - for (_, a), (x, y) in zip(m.atoms(), plane): # restore - a.x = x - a.y = y + for (_, a), xy in zip(m.atoms(), plane): # restore + a.xy = xy x_shift = c_max_x + x_gap # between columns gap last_layer = current_layer From 2d6cbc44328a02a5d2d135bccd09dc8c784bfe36 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 26 Jan 2025 17:06:42 +0100 Subject: [PATCH 55/68] dropped pach v3. required extensive refactoring stereo isomorphism for molecule to molecule implemented --- build.py | 5 +- chython/algorithms/isomorphism.py | 38 ++++- chython/containers/_unpack_v3.pyx | 211 ---------------------------- chython/containers/bonds.py | 12 +- chython/containers/molecule.py | 165 +--------------------- chython/periodictable/base/query.py | 10 +- 6 files changed, 52 insertions(+), 389 deletions(-) delete mode 100644 chython/containers/_unpack_v3.pyx diff --git a/build.py b/build.py index 7f484611..6f97641d 100644 --- a/build.py +++ b/build.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023, 2024 Ramil Nugmanov +# Copyright 2023-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -54,9 +54,6 @@ Extension('chython.containers._unpack_v0v2', ['chython/containers/_unpack_v0v2.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._unpack_v3', - ['chython/containers/_unpack_v3.pyx'], - extra_compile_args=extra_compile_args), Extension('chython.files._xyz', ['chython/files/_xyz.pyx'], extra_compile_args=extra_compile_args) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 1dfdfe20..f53beca5 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -24,7 +24,7 @@ from struct import Struct from typing import Any, Collection, Dict, Iterator, Optional, TYPE_CHECKING, Union from .._functions import lazy_product -from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement, QueryElement +from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement, QueryElement, ExtendedQuery if TYPE_CHECKING: @@ -153,20 +153,50 @@ def get_automorphism_mapping(self: 'MoleculeContainer') -> Iterator[Dict[int, in """ Iterator of all possible automorphism mappings. """ - return _get_automorphism_mapping(self.atoms_order, self._bonds) + return _get_automorphism_mapping(self._chiral_morgan, self._bonds) def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, - searching_scope: Optional[Collection[int]] = None): + searching_scope: Optional[Collection[int]] = None, match_stereo: bool = False): """ Get self to other Molecule substructure mapping generator. :param other: Molecule :param automorphism_filter: Skip matches to the same atoms. :param searching_scope: substructure atoms list to localize isomorphism. + :param match_stereo: test stereo labels matches. slow algorithm, thus disabled by default. """ if not isinstance(other, MoleculeIsomorphism): raise TypeError('MoleculeContainer expected') - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) + + for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter or match_stereo, + searching_scope=searching_scope): + if match_stereo: + sub = other.substructure(mapping.values()) # extract matched subgraph + fm = self.get_fast_mapping(sub) + if not fm: # check mor matching with stereo labels too + continue + yield fm + if not automorphism_filter: + for auto in sub.get_automorphism_mapping(): # enumerate all possible automorphisms + yield {n: auto[m] for n, m in fm.items()} + else: + yield mapping + + def get_fast_mapping(self, other: 'MoleculeContainer') -> Optional[Dict[int, int]]: + """ + Get self to other fast (suboptimal) structure mapping. + Only one possible atoms mapping returned. + Effective only for big molecules. + """ + if isinstance(other, MoleculeIsomorphism): + if len(self) != len(other): + return + so = self.smiles_atoms_order + oo = other.smiles_atoms_order + if self != other: + return + return dict(zip(so, oo)) + raise TypeError('MoleculeContainer expected') @cached_property def _cython_compiled_structure(self: 'MoleculeContainer'): diff --git a/chython/containers/_unpack_v3.pyx b/chython/containers/_unpack_v3.pyx deleted file mode 100644 index 9d4be647..00000000 --- a/chython/containers/_unpack_v3.pyx +++ /dev/null @@ -1,211 +0,0 @@ -# -*- coding: utf-8 -*- -# cython: language_level=3 -# -# Copyright 2023 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -cimport cython -from cpython.mem cimport PyMem_Malloc, PyMem_Free - -from chython.containers.bonds import Bond - -# Format specification:: -# -# Big endian bytes order -# 8 bit - 0x03 (format specification version) -# Atom block 3 bytes (repeated): -# 1 bit - atom entrance flag (always 1) -# 7 bit - atomic number (<=118) -# 3 bit - hydrogens (0-7). Note: 7 == None -# 4 bit - charge (charge + 4. possible range -4 - 4) -# 1 bit - radical state -# 1 bit padding -# 3 bit tetrahedron/allene sign -# (000 - not stereo or unknown, 001 - pure-unknown-enantiomer, 010 or 011 - has stereo) -# 4 bit - number of following bonds and CT blocks (0-15) -# -# Bond block 2 bytes (repeated 0-15 times) -# 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) -# 4 bit - bond order: 0000 - single, 0001 - double, 0010 - triple, 0011 - aromatic, 0111 - special -# -# Cis-Trans 2 bytes -# 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) -# 4 bit - CT sign: 1000 or 1001 - to avoid overlap with bond - -@cython.nonecheck(False) -@cython.boundscheck(False) -@cython.cdivision(True) -@cython.wraparound(False) -def unpack(const unsigned char[::1] data not None): - cdef char *charges - cdef unsigned char *atoms, *hydrogens, *radicals, *is_chiral, *neighbors, **orders, *seen - cdef unsigned short **connections, *ct_stereo - cdef bint *stereo_sign, *ct_sign - - cdef unsigned char a, b, i - cdef unsigned short size, shift = 1, n, m, bond_shift, atoms_count, ct_count = 0, ct_shift = 0 - - cdef tuple py_xy - cdef object bond, py_n, py_m - cdef list py_mapping, py_atoms, py_isotopes, py_bonds_flat - cdef dict py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, py_ngb - cdef dict py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo - - # allocate memory - size = len(data) - atoms = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - charges = PyMem_Malloc(size / 3 * sizeof(char)) - radicals = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - hydrogens = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - is_chiral = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - stereo_sign = PyMem_Malloc(size / 3 * sizeof(bint)) - ct_stereo = PyMem_Malloc(size / 3 * sizeof(unsigned short)) - ct_sign = PyMem_Malloc(size / 6 * sizeof(bint)) - seen = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - neighbors = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - connections = PyMem_Malloc(size / 3 * sizeof(unsigned short*)) - orders = PyMem_Malloc(size / 3 * sizeof(unsigned char *)) - for n in range(size / 3): - connections[n] = PyMem_Malloc(15 * sizeof(unsigned short)) - orders[n] = PyMem_Malloc(15 * sizeof(unsigned char)) - - # unpack atom block to separate attributes arrays - n = 0 - while shift < size: - seen[n] = 0 # erase randomness - a = data[shift] - if a & 0x80 == 0: # end of pack - break - atoms[n] = a & 0x7f - - a = data[shift + 1] - hydrogens[n] = a >> 5 - charges[n] = ((a >> 1) & 0x0f) - 4 - radicals[n] = a & 0x01 - - a = data[shift + 2] - bond_shift = a & 0x0f - b = a >> 4 - if b == 0b0011: - is_chiral[n] = 1 - stereo_sign[n] = True - elif b == 0b0010: - is_chiral[n] = 1 - stereo_sign[n] = False - else: - is_chiral[n] = 0 - - shift += 3 - neighbors[n] = 0 - for i in range(bond_shift): - a, b = data[shift], data[shift + 1] - shift += 2 - - m = n - (a << 4 | b >> 4) # second atom index - b &= 0x0f - - if b < 8: - connections[n][neighbors[n]] = m - connections[m][neighbors[m]] = n - orders[m][neighbors[m]] = b + 1 # only single direction - neighbors[n] += 1 - neighbors[m] += 1 - else: # CT stereo - ct_stereo[ct_shift] = m + 1 - ct_stereo[ct_shift + 1] = n + 1 - ct_sign[ct_count] = b & 0x01 - ct_count += 1 - ct_shift += 2 - n += 1 - atoms_count = n - - # define returned data - py_mapping = [] - py_atoms = [] - py_isotopes = [] - py_charges = {} - py_radicals = {} - py_hydrogens = {} - py_plane = {} - py_atoms_stereo = {} - py_allenes_stereo = {} - py_cis_trans_stereo = {} - py_bonds = {} - py_bonds_flat = [] - py_xy = (0., 0.) - - for n in range(atoms_count): - seen[n] = 1 - py_n = n + 1 # shared py int obj - - # fill intermediate data - py_mapping.append(py_n) - py_atoms.append(atoms[n]) - py_isotopes.append(None) - - py_charges[py_n] = charges[n] - py_radicals[py_n] = bool(radicals[n]) - if hydrogens[n] == 7: - py_hydrogens[py_n] = None - else: - py_hydrogens[py_n] = hydrogens[n] - - py_plane[py_n] = py_xy - - if is_chiral[n]: - if neighbors[n] == 2: # allene - py_allenes_stereo[py_n] = stereo_sign[n] - else: - py_atoms_stereo[py_n] = stereo_sign[n] - - py_bonds[py_n] = py_ngb = {} - for i in range(neighbors[n]): - m = connections[n][i] - py_m = m + 1 - if seen[m]: # bond partially exists. need back-connection. - py_ngb[py_m] = py_bonds[py_m][py_n] - else: - bond = object.__new__(Bond) - bond._Bond__order = orders[n][i] - bond._Bond__n = py_n - bond._Bond__m = py_m - py_ngb[py_m] = bond - py_bonds_flat.append(bond) - - ct_shift = 0 - for n in range(ct_count): - py_cis_trans_stereo[(ct_stereo[ct_shift], ct_stereo[ct_shift + 1])] = ct_sign[n] - ct_shift += 2 - - PyMem_Free(atoms) - PyMem_Free(charges) - PyMem_Free(radicals) - PyMem_Free(hydrogens) - PyMem_Free(is_chiral) - PyMem_Free(stereo_sign) - PyMem_Free(ct_stereo) - PyMem_Free(ct_sign) - PyMem_Free(neighbors) - PyMem_Free(seen) - for n in range(size / 3): - PyMem_Free(connections[n]) - PyMem_Free(orders[n]) - PyMem_Free(connections) - PyMem_Free(orders) - - return (py_mapping, py_atoms, py_isotopes, - py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, - py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo, shift, py_bonds_flat) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index 76e408ce..727b15d7 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -171,11 +171,9 @@ def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], raise TypeError('invalid order value') if in_ring is not None and not isinstance(in_ring, bool): raise TypeError('in_ring mark should be boolean or None') - if stereo is not None and not isinstance(stereo, bool): - raise TypeError('stereo mark should be boolean or None') self._order = order self._in_ring = in_ring - self._stereo = stereo + self.stereo = stereo def __eq__(self, other): if isinstance(other, Bond): @@ -218,6 +216,12 @@ def in_ring(self) -> Optional[bool]: def stereo(self) -> Optional[bool]: return self._stereo + @stereo.setter + def stereo(self, value): + if value is not None and not isinstance(value, bool): + raise TypeError('stereo mark should be boolean or None') + self._stereo = value + def copy(self, full=False) -> 'QueryBond': copy = object.__new__(self.__class__) copy._order = self.order diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index d8dd30b3..ddc6abb7 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -419,22 +419,6 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': hb[n][m] = hb[m][n] = bond return h - def get_fast_mapping(self, other: 'MoleculeContainer') -> Optional[Dict[int, int]]: - """ - Get self to other fast (suboptimal) structure mapping. - Only one possible atoms mapping returned. - Effective only for big molecules. - """ - if isinstance(other, MoleculeContainer): - if len(self) != len(other): - return - so = self.smiles_atoms_order - oo = other.smiles_atoms_order - if self != other: - return - return dict(zip(so, oo)) - raise TypeError('MoleculeContainer expected') - def pack(self, *, compressed=True, check=True, version=2, order: List[int] = None) -> bytes: """ Pack into compressed bytes. @@ -475,7 +459,7 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non :param compressed: return zlib-compressed pack. :param check: check molecule for format restrictions. - :param version: format version + :param version: format version. Only V2 is supported. :param order: atom order in V3 """ from ._pack_v2 import pack as pack_v2 @@ -491,8 +475,6 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non if version == 2: data = pack_v2(self) - elif version == 3: - data = self._pack_v3(order, check) else: raise ValueError('invalid specification version') if compressed: @@ -522,7 +504,6 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, skip_labe :param compressed: decompress data before processing. """ from ._unpack_v0v2 import unpack as unpack_v0v2 - from ._unpack_v3 import unpack as unpack_v3 if compressed: data = decompress(data) @@ -530,8 +511,6 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, skip_labe mol, cis_trans, pack_length = unpack_v0v2(data) for n, m, s in cis_trans: mol.bond(*mol._stereo_cis_trans_centers[n])._stereo = s - elif data[0] == 3: - mol, cis_trans, pack_length = unpack_v3(data) else: raise ValueError('invalid pack header') @@ -552,148 +531,6 @@ def unpach(cls, data: Union[bytes, memoryview], /, *, compressed=True) -> 'Molec def __bytes__(self): return self.pack() - def _pack_v3(self, order=None, check=True): - """ - Format V3 specification: - Big endian bytes order - 8 bit - 0x03 (format specification version) - Atom block 3 bytes (repeated): - 1 bit - atom entrance flag (always 1) - 7 bit - atomic number (<=118) - 3 bit - hydrogens (0-7). Note: 7 == None - 4 bit - charge (charge + 4. possible range -4 - 4) - 1 bit - radical state - 4 bit - atom stereo - ANDx and ORx encode only sign. X value stored in the same order in Stereo group block. - 0000 [same as V2] - no stereo or unknown - 0001 - not used - 0010 - absolute sign False - 0011 - absolute sign True - 0100 - sign False OR1 group - 0101 - sign True OR1 group - 0110 - sign False AND1 group - 0111 - sign True AND1 group - 1000 - sign False OR2 group - 1001 - sign True OR2 group - 1010 - sign False AND2 group - 1011 - sign True AND2 group - 1100 - sign False ORx group - 1101 - sign True ORx group - 1110 - sign False ANDx group - 1111 - sign True ANDx group - 4 bit - number of following bonds and CT blocks (0-15) - - Bond block 2 bytes (repeated 0-15 times) - 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) - 4 bit - bond order: 0000 - single, 0001 - double, 0010 - triple, 0011 - aromatic, 0111 - special - - Cis-Trans 2 bytes - 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) - 4 bit - CT sign: 1000 or 1001 - to avoid overlap with bond - """ - if order is None: - order = list(self._atoms) - elif check: - if not isinstance(order, (list, tuple)): - raise TypeError('invalid atoms order') - elif len(so := set(order)) != len(order) or not so.issubset(self._atoms): - raise ValueError('invalid atoms order') - - atoms = self._atoms - bonds = self._bonds - allenes_terminals = self._stereo_allenes_terminals - - cumulenes = {} - ct_map = {} - for n, m in self._cis_trans_stereo: - ct_map[n] = m - ct_map[m] = n - cumulenes[n] = [x for x, b in bonds[n].items() if b.order in (1, 4)] - cumulenes[m] = [x for x, b in bonds[m].items() if b.order in (1, 4)] - - for c in self._allenes_stereo: - n, m = allenes_terminals[c] - cumulenes[n] = [x for x, b in bonds[n].items() if b.order in (1, 4)] - cumulenes[m] = [x for x, b in bonds[m].items() if b.order in (1, 4)] - - seen = {} - data = [b'\x03'] - for i, n in enumerate(order): - seen[n] = i - atom = atoms[n] - env = bonds[n] - - data.append((0x80 | atom.atomic_number).to_bytes(1, 'big')) - - # 3 bit - hydrogens (0-6, None) | 4 bit - charge | 1 bit - radical - hcr = (atom.charge + 4) << 1 | atom.is_radical - if atom.implicit_hydrogens is None: - hcr |= 0b11100000 - else: - hcr |= atom.implicit_hydrogens << 5 - data.append(hcr.to_bytes(1, 'big')) - - if atom.stereo is not None: - if len(env) == 2: - t1, t2 = allenes_terminals[n] - nn = None - for x in order: - if nn is None: - if x in cumulenes[t1]: - nn = x - flag = True - elif x in cumulenes[t2]: - flag = False - nn = x - elif flag: # noqa - if x in cumulenes[t2]: - nm = x - break - elif x in cumulenes[t1]: - nm = x - break - if self._translate_allene_sign(n, nn, nm): # noqa - s = 0b0011_0000 - else: - s = 0b0010_0000 - elif self._translate_tetrahedron_sign(n, [x for x in order if x in env]): - s = 0b0011_0000 - else: - s = 0b0010_0000 - else: - s = 0 - - tmp = [] - for m in order[:i]: - if (b := env.get(m)) is not None: - tmp.append(((i - seen[m]) << 4 | b.order - 1).to_bytes(2, 'big')) - if n in ct_map and (m := ct_map[n]) in seen: # only right atom codes stereo sign - nm = None - for x in order: - if nm is None: - if x in cumulenes[n]: - nm = x - flag = True - elif x in cumulenes[m]: - nm = x - flag = False - elif flag: # noqa - if x in cumulenes[m]: - nn = x - break - elif x in cumulenes[n]: - nn = x - break - if self._translate_cis_trans_sign(m, n, nm, nn): # noqa - cs = 0b1001 - else: - cs = 0b1000 - tmp.append(((i - seen[m]) << 4 | cs).to_bytes(2, 'big')) - - data.append((s | len(tmp)).to_bytes(1, 'big')) - data.extend(tmp) - return b''.join(data) - def _augmented_substructure(self, atoms: Iterable[int], deep: int): atoms = set(atoms) bonds = self._bonds diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index c955da28..def97785 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2020-2025 Ramil Nugmanov # Copyright 2021 Dmitrij Zanadvornykh # This file is part of chython. # @@ -125,7 +125,7 @@ def __init__(self, charge: int = 0, is_radical: bool = False, heteroatoms: Union self.heteroatoms = heteroatoms self.ring_sizes = ring_sizes self.implicit_hydrogens = implicit_hydrogens - self._stereo = stereo + self.stereo = stereo @property def charge(self) -> int: @@ -201,6 +201,12 @@ def ring_sizes(self, value): def stereo(self): return self._stereo + @stereo.setter + def stereo(self, value: Optional[bool]): + if value is not None and not isinstance(value, bool): + raise TypeError('stereo should be bool') + self._stereo = value + def copy(self, full=False): copy = super().copy(full=full) copy._charge = self.charge From a8597837d6b3a32103ccf55d7917c5e7e2d489c0 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 26 Jan 2025 20:19:16 +0100 Subject: [PATCH 56/68] query stereo isomorphism implemented. smarts atom stereo parsing implemented. single @ always treated as True label, thus, violates smiles stereo rules. simple cis-trans parsing supported now. smarts generation is broken now. --- chython/algorithms/isomorphism.py | 98 ++++++++++++------------------ chython/algorithms/smiles.py | 8 +-- chython/files/daylight/smarts.py | 16 ++++- chython/files/daylight/tokenize.py | 3 +- 4 files changed, 60 insertions(+), 65 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index f53beca5..914a68d9 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -338,70 +338,52 @@ def get_mapping(query, scope): array('I', [n in scope for n in other])) else: components = get_mapping = None - yield from self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, - components=components, get_mapping=get_mapping) - return - # todo: implement stereo - atoms_stereo = self._atoms_stereo - allenes_stereo = self._allenes_stereo - cis_trans_stereo = self._cis_trans_stereo - - other_atoms_stereo = other._atoms_stereo - other_allenes_stereo = other._allenes_stereo - other_cis_trans_stereo = other._cis_trans_stereo - other_translate_tetrahedron_sign = other._translate_tetrahedron_sign - other_translate_allene_sign = other._translate_allene_sign - other_translate_cis_trans_sign = other._translate_cis_trans_sign - - tetrahedrons = self.stereogenic_tetrahedrons - cis_trans = self.stereogenic_cis_trans - allenes = self.stereogenic_allenes - - oatoms = other._atoms - - for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter, - searching_scope=searching_scope): + + for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, + components=components, get_mapping=get_mapping): + reverse = None + # test stereo labels matches for n, a in self.atoms(): - if a.stereo is None: - continue + if not isinstance(a, ExtendedQuery) or a.stereo is None: + continue # non-chiral atom matches any atom. no need for checks m = mapping[n] - oa = oatoms[m] - if oa.stereo is None: # stereo in query should match only stereo atom - break - other._translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) - for n, s in atoms_stereo.items(): - m = mapping[n] - if m not in other_atoms_stereo: # self stereo atom not stereo in other - break - # translate stereo mark in other in order of self tetrahedron - if other_translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) != s: - break + if other.atom(m).stereo is None: # stereo in query should match only stereo atom + break # reject mapping + + if m in other.stereogenic_tetrahedrons: + if other._translate_tetrahedron_sign(m, [mapping[x] for x in self._bonds[n]]) != a.stereo: + break # stereo sign doesn't match + else: # allene case + if reverse is None: + reverse = {m: n for n, m in mapping.items()} + ot1, ot2 = other._stereo_allenes_terminals[m] # get terminal atoms + on1, om1, on2, om2 = other.stereogenic_allenes[m] # get neighbors + t1, t2 = reverse[ot1], reverse[ot2] + env = (reverse.get(on1), reverse.get(om1), reverse.get(on2), reverse.get(om2)) + n1 = mapping[next(x for x in self._bonds[t1] if x in env)] + m1 = mapping[next(x for x in self._bonds[t2] if x in env)] + if other._translate_allene_sign(m, n1, m1) != a.stereo: + break else: - for n, s in allenes_stereo.items(): - m = mapping[n] - if m not in other_allenes_stereo: # self stereo allene not stereo in other + for n, m, b in self.bonds(): + if b.stereo is None: + continue + on, om = mapping[n], mapping[m] + if other.bond(on, om).stereo is None: # chiral query bond matches only chiral molecule bond break - # translate stereo mark in other in order of self allene - nn, nm, *_ = allenes[n] - if other_translate_allene_sign(m, mapping[nn], mapping[nm]) != s: + if reverse is None: + reverse = {m: n for n, m in mapping.items()} + + ot1, ot2 = ots = other._stereo_cis_trans_terminals[on] # get terminal atoms + on1, om1, on2, om2 = other.stereogenic_cis_trans[ots] # get neighbors + t1, t2 = reverse[ot1], reverse[ot2] + env = (reverse.get(on1), reverse.get(om1), reverse.get(on2), reverse.get(om2)) + n1 = mapping[next(x for x in self._bonds[t1] if x in env)] + m1 = mapping[next(x for x in self._bonds[t2] if x in env)] + if other._translate_cis_trans_sign(ot1, ot2, n1, m1) != b.stereo: break else: - for nm, s in cis_trans_stereo.items(): - n, m = nm - on, om = mapping[n], mapping[m] - if (on, om) not in other_cis_trans_stereo: - if (om, on) not in other_cis_trans_stereo: - break # self stereo cis_trans not stereo in other - else: - nn, nm, *_ = cis_trans[nm] - if other_translate_cis_trans_sign(om, on, mapping[nm], mapping[nn]) != s: - break - else: - nn, nm, *_ = cis_trans[nm] - if other_translate_cis_trans_sign(on, om, mapping[nn], mapping[nm]) != s: - break - else: - yield mapping + yield mapping @cached_property def _cython_compiled_query(self): diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index 5f463035..e6959afb 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2025 Ramil Nugmanov # Copyright 2019 Timur Gimadiev # This file is part of chython. # @@ -592,7 +592,7 @@ def _format_atom(self: 'QueryContainer', n, adjacency, **kwargs): if isinstance(atom, ExtendedQuery): if atom.stereo is not None: # mark atom as chiral. it's too difficult to set correct sign - smi.append(';@?') + smi.append(';@' if atom.stereo else ';@@') if atom.charge: smi.append(';') @@ -623,8 +623,8 @@ def _format_bond(self: 'QueryContainer', n, m, adjacency, **kwargs): # bond chirality skipped. too difficult to implement. b = self._bonds[n][m] s = ','.join(order_str[x] for x in b.order) - if (c := b.in_ring) is not None: - s += ';@' if c else ';!@' + if b.in_ring is not None: + s += ';@' if b.in_ring else ';!@' return s diff --git a/chython/files/daylight/smarts.py b/chython/files/daylight/smarts.py index 3a409505..42adb6d8 100644 --- a/chython/files/daylight/smarts.py +++ b/chython/files/daylight/smarts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -21,7 +21,7 @@ from re import compile, findall, search from .parser import parser from .tokenize import smarts_tokenize -from ...containers import QueryContainer +from ...containers import QueryContainer, QueryBond from ...periodictable import ListElement, QueryElement @@ -102,6 +102,10 @@ def smarts(data: str): for i in msk: data['atoms'][i]['masked'] = True + for i, s in data['stereo_atoms'].items(): + data['atoms'][i]['stereo'] = s + stereo_bonds = data['stereo_bonds'] + g = QueryContainer() mapping = {} @@ -118,6 +122,14 @@ def smarts(data: str): g.add_atom(e(**a), n) for n, m, b in data['bonds']: + if n in stereo_bonds and m in stereo_bonds: + if m not in stereo_bonds[n]: # only simple cis-trans supported, not cumulenes + _, s1 = stereo_bonds[n].popitem() + _, s2 = stereo_bonds[m].popitem() + if isinstance(b, int): + b = QueryBond(b, stereo=s1 == s2) + else: + b.stereo = s1 == s2 g.add_bond(mapping[n], mapping[m], b) return g diff --git a/chython/files/daylight/tokenize.py b/chython/files/daylight/tokenize.py index e8f3c7e6..fe626b9f 100644 --- a/chython/files/daylight/tokenize.py +++ b/chython/files/daylight/tokenize.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -297,6 +297,7 @@ def _query_parse(token): if stereo := search(str_re, token): # drop stereo mark. unsupported token = token[:stereo.start()] + token[stereo.end():] + out['stereo'] = stereo.group() == '@' # supported only <;> and <,> logic. <&> and silent <&> not supported! primitives = token.split(';') From 8ea65e270a83862cab01a1d33ccfe414ee9a130b Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 26 Jan 2025 20:52:38 +0100 Subject: [PATCH 57/68] dropped query from molecule generation. dropped smarts generation. smarts notation complexity reduced. --- chython/algorithms/smiles.py | 79 +----------------------------- chython/containers/molecule.py | 54 ++------------------ chython/containers/query.py | 17 +++++-- chython/containers/reaction.py | 4 +- chython/files/daylight/smarts.py | 60 ++++++----------------- chython/utils/__init__.py | 5 +- chython/utils/functional_groups.py | 53 -------------------- 7 files changed, 38 insertions(+), 234 deletions(-) delete mode 100644 chython/utils/functional_groups.py diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index e6959afb..22bc5e6b 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -25,17 +25,15 @@ from itertools import product from random import random from typing import Callable, Optional, Tuple, TYPE_CHECKING, Union -from ..periodictable import ExtendedQuery, QueryElement if TYPE_CHECKING: - from chython import MoleculeContainer, CGRContainer, QueryContainer + from chython import MoleculeContainer, CGRContainer from chython.containers.graph import Graph charge_str = {-4: '-4', -3: '-3', -2: '-2', -1: '-', 0: '0', 1: '+', 2: '+2', 3: '+3', 4: '+4'} order_str = {1: '-', 2: '=', 3: '#', 4: ':', 8: '~', None: '.'} organic_set = {'C', 'N', 'O', 'P', 'S', 'F', 'Cl', 'Br', 'I', 'B'} -hybridization_str = {4: '4', 3: '1', 2: '2', 1: '3', None: 'n'} dyn_order_str = {(None, 1): '[.>-]', (None, 2): '[.>=]', (None, 3): '[.>#]', (None, 4): '[.>:]', (None, 8): '[.>~]', (1, None): '[->.]', (1, 1): '', (1, 2): '[->=]', (1, 3): '[->#]', (1, 4): '[->:]', (1, 8): '[->~]', (2, None): '[=>.]', (2, 1): '[=>-]', (2, 2): '=', (2, 3): '[=>#]', (2, 4): '[=>:]', (2, 8): '[=>~]', @@ -555,77 +553,4 @@ def _format_bond(self: 'CGRContainer', n, m, adjacency, **kwargs): return dyn_order_str[(bond.order, bond.p_order)] -class QuerySmiles(Smiles): - __slots__ = () - - def _smiles_order(self: 'QueryContainer', stereo=True): - # try to keep atoms order - return {n: i for i, n in enumerate(self._atoms)}.__getitem__ - - def _format_cxsmiles(self: 'QueryContainer', order): - hh = ['atomProp'] - cx = [] - rad = [str(n) for n, m in enumerate(order) if isinstance(a:=self._atoms[m], ExtendedQuery) and a.is_radical] - if rad: - cx.append('^1:' + ','.join(rad)) - - for n, m in enumerate(order): - atom = self._atoms[m] - if len(hb := atom.hybridization) > 1 or (hb and hb[0] != 4): - hh.append(f'{n}.hyb.' + ''.join(hybridization_str[x] for x in hb)) - if isinstance(atom, ExtendedQuery) and (ha := atom.heteroatoms): - hh.append(f'{n}.het.' + ''.join(str(x) for x in ha)) - if atom.masked: - hh.append(f'{n}.msk.1') - if len(hh) > 1: - cx.append(':'.join(hh)) - if cx: - return f'|{",".join(cx)}|' - - def _format_atom(self: 'QueryContainer', n, adjacency, **kwargs): - atom = self._atoms[n] - if isinstance(atom, QueryElement) and atom.isotope: - smi = ['[', str(atom.isotope), atom.atomic_symbol] - else: - smi = ['[', atom.atomic_symbol] - - if isinstance(atom, ExtendedQuery): - if atom.stereo is not None: - # mark atom as chiral. it's too difficult to set correct sign - smi.append(';@' if atom.stereo else ';@@') - - if atom.charge: - smi.append(';') - smi.append(charge_str[atom.charge]) - - if atom.implicit_hydrogens: # h implicit-H-count implicit hydrogens - smi.append(';') - smi.append(','.join(f'h{x}' for x in atom.implicit_hydrogens)) - - if atom.neighbors: # D degree explicit connections - smi.append(';') - smi.append(','.join(f'D{x}' for x in atom.neighbors)) - - if isinstance(atom, ExtendedQuery) and atom.ring_sizes: - smi.append(';') - if atom.ring_sizes[0]: - smi.append(','.join(f'r{x}' for x in atom.ring_sizes)) - else: - smi.append('!R') - - if len(atom.hybridization) == 1 and atom.hybridization[0] == 4: # only aromatic. other marks in cx extension - smi.append(';a') - - smi.append(']') - return ''.join(smi) - - def _format_bond(self: 'QueryContainer', n, m, adjacency, **kwargs): - # bond chirality skipped. too difficult to implement. - b = self._bonds[n][m] - s = ','.join(order_str[x] for x in b.order) - if b.in_ring is not None: - s += ';@' if b.in_ring else ';!@' - return s - - -__all__ = ['MoleculeSmiles', 'CGRSmiles', 'QuerySmiles'] +__all__ = ['MoleculeSmiles', 'CGRSmiles'] diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index ddc6abb7..b1201b49 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -20,12 +20,11 @@ from collections import Counter, defaultdict from functools import cached_property from numpy import uint, zeros -from typing import Dict, Iterable, List, Optional, Tuple, Union +from typing import Dict, Iterable, List, Tuple, Union from zlib import compress, decompress -from .bonds import Bond, DynamicBond, QueryBond +from .bonds import Bond, DynamicBond from .cgr import CGRContainer from .graph import Graph -from .query import QueryContainer from ..algorithms.aromatics import Aromatize from ..algorithms.calculate2d import Calculate2DMolecule from ..algorithms.depict import DepictMolecule @@ -40,7 +39,7 @@ from ..algorithms.tautomers import Tautomers from ..algorithms.x3dom import X3domMolecule from ..exceptions import ValenceError -from ..periodictable import DynamicElement, Element, QueryElement, H as _H +from ..periodictable import DynamicElement, Element, H as _H # atomic number constants @@ -262,11 +261,7 @@ def union(self, other: 'MoleculeContainer', *, remap: bool = False, copy: bool = raise TypeError('MoleculeContainer expected') return super().union(other, remap=remap, copy=copy) - def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalculate_hydrogens=True, - skip_neighbors_marks=False, skip_hybridizations_marks=False, skip_hydrogens_marks=False, - skip_rings_sizes_marks=False, skip_heteroatoms_marks=False, skip_in_ring_bond_marks=False, - skip_stereo_marks=False) -> \ - Union['MoleculeContainer', 'QueryContainer']: + def substructure(self, atoms: Iterable[int], *, recalculate_hydrogens=True) -> 'MoleculeContainer': """ Create substructure containing atoms from atoms list. @@ -275,54 +270,13 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul Call `kekule()` and `thiele()` in sequence to fix marks. :param atoms: list of atoms numbers of substructure - :param as_query: return Query object based on graph substructure :param recalculate_hydrogens: calculate implicit H count in substructure - :param skip_neighbors_marks: Don't set neighbors count marks on substructured queries - :param skip_hybridizations_marks: Don't set hybridizations marks on substructured queries - :param skip_hydrogens_marks: Don't set hydrogens count marks on substructured queries - :param skip_rings_sizes_marks: Don't set rings_sizes marks on substructured queries - :param skip_heteroatoms_marks: Don't set heteroatoms count marks - :param skip_in_ring_bond_marks: Don't set in_ring bond marks - :param skip_stereo_marks: Don't set stereo marks on substructured queries """ if not atoms: raise ValueError('empty atoms list not allowed') if set(atoms) - self._atoms.keys(): raise ValueError('invalid atom numbers') atoms = tuple(n for n in self if n in atoms) # save original order - if as_query: - sub = object.__new__(QueryContainer) - - lost = {n for n, a in self.atoms() if a != H} - set(atoms) # atoms not in substructure - # atoms with fully present neighbors - not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} - - # check for full presence of cumulene chains and terminal attachments - for p in self.stereogenic_cumulenes.values(): - if not not_skin.issuperset(p): - not_skin.difference_update(p) - - sub._atoms = {n: QueryElement.from_atom(self._atoms[n], - neighbors=not skip_neighbors_marks, - hybridization=not skip_hybridizations_marks, - hydrogens=not skip_hydrogens_marks, - ring_sizes=not skip_rings_sizes_marks, - heteroatoms=not skip_heteroatoms_marks, - stereo=not skip_stereo_marks and n in not_skin) - for n in atoms} - sub._bonds = sb = {} - for n in atoms: - sb[n] = sbn = {} - for m, bond in self._bonds[n].items(): - if m in sb: # bond partially exists. need back-connection. - sbn[m] = sb[m][n] - elif m in atoms: - sbn[m] = QueryBond.from_bond(bond, - in_ring=not skip_in_ring_bond_marks, - stereo=not skip_stereo_marks and n in not_skin and m in not_skin) - return sub - - # molecule substructure sub = object.__new__(self.__class__) sub._name = sub._meta = sub._changed = None sub._atoms = {n: self._atoms[n].copy(hydrogens=not recalculate_hydrogens, stereo=True) for n in atoms} diff --git a/chython/containers/query.py b/chython/containers/query.py index 757925f2..391bd452 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2024 Ramil Nugmanov +# Copyright 2018-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,13 +20,22 @@ from .bonds import Bond, QueryBond from .graph import Graph from ..algorithms.isomorphism import QueryIsomorphism -from ..algorithms.smiles import QuerySmiles from ..periodictable import Element, QueryElement from ..periodictable.base import Query -class QueryContainer(Graph[Query, QueryBond], QueryIsomorphism, QuerySmiles): - __slots__ = () +class QueryContainer(Graph[Query, QueryBond], QueryIsomorphism): + __slots__ = ('_smarts',) + + def __init__(self, smarts: str): + super().__init__() + self._smarts = smarts + + def __str__(self): + return self._smarts + + def __repr__(self): + return f'smarts({self._smarts})' def add_atom(self, atom: Union[Query, Element, int, str], *args, **kwargs): if not isinstance(atom, Query): diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index ca13c5e5..ab55da04 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -56,7 +56,7 @@ def __init__(self, reactants: Sequence[MoleculeContainer] = (), products: Sequen if not reactants and not products and not reagents: raise ValueError('At least one graph object required') elif not all(isinstance(x, MoleculeContainer) for x in chain(reactants, products, reagents)): - raise TypeError(f'MoleculeContainers expected') + raise TypeError('MoleculeContainers expected') self._reactants = reactants self._products = products diff --git a/chython/files/daylight/smarts.py b/chython/files/daylight/smarts.py index 42adb6d8..40c3cd58 100644 --- a/chython/files/daylight/smarts.py +++ b/chython/files/daylight/smarts.py @@ -26,8 +26,6 @@ cx_radicals = compile(r'\^[1-7]:[0-9]+(?:,[0-9]+)*') -cx_hh = compile(r'atomProp(:[0-9]+\.(?:hyb|het|msk)\.[0-9]+)+') -hybridization = {'4': 4, '3': 1, '2': 2, '1': 3} # AD-HOC for masked atoms in SMARTS # not thread safe @@ -38,28 +36,23 @@ def smarts(data: str): """ Parse SMARTS string. - * stereo ignored. * only D, a, h, r and !R atom primitives supported. - * bond order list and not bond supported. + * bond order list (max 2) and not bond supported. * [not]ring bond supported only in combination with explicit bonds, not bonds and bonds orders lists. * mapping, charge and isotopes supported. * list of elements supported. * A - treats as any element. primitive (aliphatic) ignored. - * M - treats as any metal.. + * M - treats as any metal * <&> logic operator unsupported. * <;> logic operator is mandatory except (however preferable) for charge, isotope, stereo marks. * CXSMARTS radicals supported. - * hybridization and heteroatoms count in CXSMARTS atomProp notation coded as and keys. * masked atom - `chython.Reactor` specific mark for masking reactant atoms from deletion. - Coded in CXSMARTS atomProp as key with any value. For example:: - [C;r5,r6;a]-;!@[C;h1,h2] |^1:1,atomProp:1.hyb.24:1.het.0| - aromatic C member of 5 or 6 atoms ring + [C;r5,r6;a]-;!@[C;h1,h2;z2,z4] |^1:1| - aromatic C member of 5 or 6 atoms ring connected with non-ring single bond to aromatic or SP2 radical C with 1 or 2 hydrogens. - Alternative hybridization, heteroatoms and masks coding: - * primitive - heteroatoms (e.g. x2 - two heteroatoms) * primitive - hybridization (N = 1 - sp3, 2 - sp2, 3 - sp, 4 - aromatic) * primitive - masked atom @@ -72,45 +65,22 @@ def smarts(data: str): raise TypeError('Must be a SMARTS string') smr, *cx = data.split() - hyb = {} - het = {} - msk = [] - if cx and cx[0].startswith('|') and cx[0].endswith('|'): - radicals = [int(x) for x in findall(cx_radicals, cx[0]) for x in x[3:].split(',')] - - if hh := search(cx_hh, cx[0]): - for x in hh.group().split(':')[1:]: - i, h, v = x.split('.') - i = int(i) - if h == 'hyb': - hyb[i] = [hybridization[x] for x in v] - elif h == 'het': - het[i] = [int(y) for y in v] - else: - msk.append(i) - else: - radicals = [] + parsed = parser(smarts_tokenize(smr), False) - data = parser(smarts_tokenize(smr), False) - - for x in radicals: - data['atoms'][x]['is_radical'] = True - for i, v in hyb.items(): - data['atoms'][i]['hybridization'] = v - for i, v in het.items(): - data['atoms'][i]['heteroatoms'] = v - for i in msk: - data['atoms'][i]['masked'] = True + if cx and cx[0].startswith('|') and cx[0].endswith('|'): + for x in findall(cx_radicals, cx[0]): + for i in x[3:].split(','): + parsed['atoms'][int(i)]['is_radical'] = True - for i, s in data['stereo_atoms'].items(): - data['atoms'][i]['stereo'] = s - stereo_bonds = data['stereo_bonds'] + for i, s in parsed['stereo_atoms'].items(): + parsed['atoms'][i]['stereo'] = s + stereo_bonds = parsed['stereo_bonds'] - g = QueryContainer() + g = QueryContainer(data) mapping = {} - free = count(max(a.get('parsed_mapping', 0) for a in data['atoms']) + 1) - for i, a in enumerate(data['atoms']): + free = count(max(a.get('parsed_mapping', 0) for a in parsed['atoms']) + 1) + for i, a in enumerate(parsed['atoms']): mapping[i] = n = a.pop('parsed_mapping', 0) or next(global_free_masked if a.get('masked') else free) e = a.pop('element') if isinstance(e, int): @@ -121,7 +91,7 @@ def smarts(data: str): e = partial(ListElement, e) g.add_atom(e(**a), n) - for n, m, b in data['bonds']: + for n, m, b in parsed['bonds']: if n in stereo_bonds and m in stereo_bonds: if m not in stereo_bonds[n]: # only simple cis-trans supported, not cumulenes _, s1 = stereo_bonds[n].popitem() diff --git a/chython/utils/__init__.py b/chython/utils/__init__.py index ff7f58a9..301256ae 100644 --- a/chython/utils/__init__.py +++ b/chython/utils/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2022 Ramil Nugmanov +# Copyright 2019-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,7 +18,6 @@ # from importlib.util import find_spec from .free_wilson import * -from .functional_groups import * from .grid import * from .retro import * from .svg import * @@ -49,7 +48,7 @@ def w(obj): printing.is_sequence = w -__all__ = ['functional_groups', 'fw_prepare_groups', 'fw_decomposition_tree', +__all__ = ['fw_prepare_groups', 'fw_decomposition_tree', 'grid_depict', 'GridDepict', 'retro_depict', 'RetroDepict', 'svg2png', 'patch_pandas'] diff --git a/chython/utils/functional_groups.py b/chython/utils/functional_groups.py deleted file mode 100644 index e1b7105f..00000000 --- a/chython/utils/functional_groups.py +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020, 2021 Ramil Nugmanov -# Copyright 2020 Dinar Batyrshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -def functional_groups(molecule, limit): - """ - Generate all connected atom groups up to limit atoms. - - :param molecule: MoleculeContainer - :param limit: chain length - :return: list of molecule functional groups - """ - bonds = molecule._bonds - - if limit < 1: - raise ValueError('limit should be >= 1') - - response = [] - groups = set() - stack = [([a], list(n)) for a, n in bonds.items()] - while stack: - aug, nei = stack.pop(0) - for x in nei: - augx = (*aug, x) - if augx not in groups: - groups.add(augx) - response.append(molecule.substructure(augx, as_query=True)) - nt = nei.copy() - nt.remove(x) - nt.extend(list(bonds[x])) - if len(augx) < limit: - stack.append((augx, nt)) - return response - - -__all__ = ['functional_groups'] From f5e561301a308906d749e138ffb4c9c5daadcf84 Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 29 Jan 2025 20:38:49 +0100 Subject: [PATCH 58/68] tests merged --- chython/algorithms/aromatics/test/__init__.py | 18 ++ .../algorithms/aromatics/test}/test_kekule.py | 0 .../algorithms/aromatics/test}/test_thiele.py | 0 chython/algorithms/calculate2d/molecule.py | 2 +- .../algorithms/fingerprints/test/__init__.py | 18 ++ .../fingerprints/test}/test_linear.py | 0 .../fingerprints/test}/test_morgan.py | 0 chython/algorithms/mapping/test/__init__.py | 18 ++ .../algorithms/mapping/test}/test_mapping.py | 0 .../algorithms/standardize/test}/__init__.py | 0 .../standardize/test}/test_groups.py | 0 .../algorithms/tautomers/test}/__init__.py | 0 .../tautomers/test}/test_tautomers.py | 0 chython/algorithms/test/__init__.py | 18 ++ .../algorithms/test}/test_smiles.py | 0 chython/files/daylight/test/__init__.py | 18 ++ .../daylight/test}/test_daylight_smarts.py | 0 .../daylight/test}/test_daylight_smiles.py | 0 .../files/daylight/test}/test_parser.py | 0 .../files/daylight/test}/test_tokenize.py | 0 chython/reactor/test/__init__.py | 18 ++ .../reactor/test}/test_deprotection.py | 0 .../reactor/test}/test_scaffold.py | 0 {tests/data => test}/MR.rdf | 0 {tests/data => test}/arenes.sdf | 0 {tests/data => test}/ch.xyz | 0 {tests/data => test}/cycle.sdf | 0 {tests/data => test}/depict.sdf | 0 {tests/data => test}/hbonds.sdf | 0 {tests/data => test}/heterocycles_charges.smi | 0 {tests/data => test}/implicit.mrv | 0 {tests/data => test}/implicit.sdf | 0 {tests/data => test}/ions.rdf | 0 {tests/data => test}/isomorphism.sdf | 0 {tests/data => test}/mcs.sdf | 0 {tests/data => test}/morgan_ruiner.sdf | 0 {tests/data => test}/peptide.sdf | 0 {tests/data => test}/reaction_centerslist.rdf | 0 {tests/data => test}/standardize.rdf | 0 {tests/data => test}/standardize.sdf | 0 {tests/data => test}/stereo.sdf | 0 tests/containers/test_query.py | 174 ------------------ 42 files changed, 109 insertions(+), 175 deletions(-) create mode 100644 chython/algorithms/aromatics/test/__init__.py rename {tests/algorithms/aromatics => chython/algorithms/aromatics/test}/test_kekule.py (100%) rename {tests/algorithms/aromatics => chython/algorithms/aromatics/test}/test_thiele.py (100%) create mode 100644 chython/algorithms/fingerprints/test/__init__.py rename {tests/algorithms/fingerprints => chython/algorithms/fingerprints/test}/test_linear.py (100%) rename {tests/algorithms/fingerprints => chython/algorithms/fingerprints/test}/test_morgan.py (100%) create mode 100644 chython/algorithms/mapping/test/__init__.py rename {tests/algorithms/mapping => chython/algorithms/mapping/test}/test_mapping.py (100%) rename {tests/algorithms/standardize => chython/algorithms/standardize/test}/__init__.py (100%) rename {tests/algorithms/standardize => chython/algorithms/standardize/test}/test_groups.py (100%) rename {tests/algorithms/tautomers => chython/algorithms/tautomers/test}/__init__.py (100%) rename {tests/algorithms/tautomers => chython/algorithms/tautomers/test}/test_tautomers.py (100%) create mode 100644 chython/algorithms/test/__init__.py rename {tests/algorithms => chython/algorithms/test}/test_smiles.py (100%) create mode 100644 chython/files/daylight/test/__init__.py rename {tests/files/daylight => chython/files/daylight/test}/test_daylight_smarts.py (100%) rename {tests/files/daylight => chython/files/daylight/test}/test_daylight_smiles.py (100%) rename {tests/files/daylight => chython/files/daylight/test}/test_parser.py (100%) rename {tests/files/daylight => chython/files/daylight/test}/test_tokenize.py (100%) create mode 100644 chython/reactor/test/__init__.py rename {tests/reactor => chython/reactor/test}/test_deprotection.py (100%) rename {tests/reactor => chython/reactor/test}/test_scaffold.py (100%) rename {tests/data => test}/MR.rdf (100%) rename {tests/data => test}/arenes.sdf (100%) rename {tests/data => test}/ch.xyz (100%) rename {tests/data => test}/cycle.sdf (100%) rename {tests/data => test}/depict.sdf (100%) rename {tests/data => test}/hbonds.sdf (100%) rename {tests/data => test}/heterocycles_charges.smi (100%) rename {tests/data => test}/implicit.mrv (100%) rename {tests/data => test}/implicit.sdf (100%) rename {tests/data => test}/ions.rdf (100%) rename {tests/data => test}/isomorphism.sdf (100%) rename {tests/data => test}/mcs.sdf (100%) rename {tests/data => test}/morgan_ruiner.sdf (100%) rename {tests/data => test}/peptide.sdf (100%) rename {tests/data => test}/reaction_centerslist.rdf (100%) rename {tests/data => test}/standardize.rdf (100%) rename {tests/data => test}/standardize.sdf (100%) rename {tests/data => test}/stereo.sdf (100%) delete mode 100644 tests/containers/test_query.py diff --git a/chython/algorithms/aromatics/test/__init__.py b/chython/algorithms/aromatics/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/algorithms/aromatics/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/tests/algorithms/aromatics/test_kekule.py b/chython/algorithms/aromatics/test/test_kekule.py similarity index 100% rename from tests/algorithms/aromatics/test_kekule.py rename to chython/algorithms/aromatics/test/test_kekule.py diff --git a/tests/algorithms/aromatics/test_thiele.py b/chython/algorithms/aromatics/test/test_thiele.py similarity index 100% rename from tests/algorithms/aromatics/test_thiele.py rename to chython/algorithms/aromatics/test/test_thiele.py diff --git a/chython/algorithms/calculate2d/molecule.py b/chython/algorithms/calculate2d/molecule.py index b2500727..cca6a137 100644 --- a/chython/algorithms/calculate2d/molecule.py +++ b/chython/algorithms/calculate2d/molecule.py @@ -34,7 +34,7 @@ from chython import MoleculeContainer try: - from py_mini_racer.py_mini_racer import MiniRacer, JSEvalException + from py_mini_racer import MiniRacer, JSEvalException ctx = MiniRacer() ctx.eval('const self = this') diff --git a/chython/algorithms/fingerprints/test/__init__.py b/chython/algorithms/fingerprints/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/algorithms/fingerprints/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/tests/algorithms/fingerprints/test_linear.py b/chython/algorithms/fingerprints/test/test_linear.py similarity index 100% rename from tests/algorithms/fingerprints/test_linear.py rename to chython/algorithms/fingerprints/test/test_linear.py diff --git a/tests/algorithms/fingerprints/test_morgan.py b/chython/algorithms/fingerprints/test/test_morgan.py similarity index 100% rename from tests/algorithms/fingerprints/test_morgan.py rename to chython/algorithms/fingerprints/test/test_morgan.py diff --git a/chython/algorithms/mapping/test/__init__.py b/chython/algorithms/mapping/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/algorithms/mapping/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/tests/algorithms/mapping/test_mapping.py b/chython/algorithms/mapping/test/test_mapping.py similarity index 100% rename from tests/algorithms/mapping/test_mapping.py rename to chython/algorithms/mapping/test/test_mapping.py diff --git a/tests/algorithms/standardize/__init__.py b/chython/algorithms/standardize/test/__init__.py similarity index 100% rename from tests/algorithms/standardize/__init__.py rename to chython/algorithms/standardize/test/__init__.py diff --git a/tests/algorithms/standardize/test_groups.py b/chython/algorithms/standardize/test/test_groups.py similarity index 100% rename from tests/algorithms/standardize/test_groups.py rename to chython/algorithms/standardize/test/test_groups.py diff --git a/tests/algorithms/tautomers/__init__.py b/chython/algorithms/tautomers/test/__init__.py similarity index 100% rename from tests/algorithms/tautomers/__init__.py rename to chython/algorithms/tautomers/test/__init__.py diff --git a/tests/algorithms/tautomers/test_tautomers.py b/chython/algorithms/tautomers/test/test_tautomers.py similarity index 100% rename from tests/algorithms/tautomers/test_tautomers.py rename to chython/algorithms/tautomers/test/test_tautomers.py diff --git a/chython/algorithms/test/__init__.py b/chython/algorithms/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/algorithms/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/tests/algorithms/test_smiles.py b/chython/algorithms/test/test_smiles.py similarity index 100% rename from tests/algorithms/test_smiles.py rename to chython/algorithms/test/test_smiles.py diff --git a/chython/files/daylight/test/__init__.py b/chython/files/daylight/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/files/daylight/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/tests/files/daylight/test_daylight_smarts.py b/chython/files/daylight/test/test_daylight_smarts.py similarity index 100% rename from tests/files/daylight/test_daylight_smarts.py rename to chython/files/daylight/test/test_daylight_smarts.py diff --git a/tests/files/daylight/test_daylight_smiles.py b/chython/files/daylight/test/test_daylight_smiles.py similarity index 100% rename from tests/files/daylight/test_daylight_smiles.py rename to chython/files/daylight/test/test_daylight_smiles.py diff --git a/tests/files/daylight/test_parser.py b/chython/files/daylight/test/test_parser.py similarity index 100% rename from tests/files/daylight/test_parser.py rename to chython/files/daylight/test/test_parser.py diff --git a/tests/files/daylight/test_tokenize.py b/chython/files/daylight/test/test_tokenize.py similarity index 100% rename from tests/files/daylight/test_tokenize.py rename to chython/files/daylight/test/test_tokenize.py diff --git a/chython/reactor/test/__init__.py b/chython/reactor/test/__init__.py new file mode 100644 index 00000000..c8a5a613 --- /dev/null +++ b/chython/reactor/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/tests/reactor/test_deprotection.py b/chython/reactor/test/test_deprotection.py similarity index 100% rename from tests/reactor/test_deprotection.py rename to chython/reactor/test/test_deprotection.py diff --git a/tests/reactor/test_scaffold.py b/chython/reactor/test/test_scaffold.py similarity index 100% rename from tests/reactor/test_scaffold.py rename to chython/reactor/test/test_scaffold.py diff --git a/tests/data/MR.rdf b/test/MR.rdf similarity index 100% rename from tests/data/MR.rdf rename to test/MR.rdf diff --git a/tests/data/arenes.sdf b/test/arenes.sdf similarity index 100% rename from tests/data/arenes.sdf rename to test/arenes.sdf diff --git a/tests/data/ch.xyz b/test/ch.xyz similarity index 100% rename from tests/data/ch.xyz rename to test/ch.xyz diff --git a/tests/data/cycle.sdf b/test/cycle.sdf similarity index 100% rename from tests/data/cycle.sdf rename to test/cycle.sdf diff --git a/tests/data/depict.sdf b/test/depict.sdf similarity index 100% rename from tests/data/depict.sdf rename to test/depict.sdf diff --git a/tests/data/hbonds.sdf b/test/hbonds.sdf similarity index 100% rename from tests/data/hbonds.sdf rename to test/hbonds.sdf diff --git a/tests/data/heterocycles_charges.smi b/test/heterocycles_charges.smi similarity index 100% rename from tests/data/heterocycles_charges.smi rename to test/heterocycles_charges.smi diff --git a/tests/data/implicit.mrv b/test/implicit.mrv similarity index 100% rename from tests/data/implicit.mrv rename to test/implicit.mrv diff --git a/tests/data/implicit.sdf b/test/implicit.sdf similarity index 100% rename from tests/data/implicit.sdf rename to test/implicit.sdf diff --git a/tests/data/ions.rdf b/test/ions.rdf similarity index 100% rename from tests/data/ions.rdf rename to test/ions.rdf diff --git a/tests/data/isomorphism.sdf b/test/isomorphism.sdf similarity index 100% rename from tests/data/isomorphism.sdf rename to test/isomorphism.sdf diff --git a/tests/data/mcs.sdf b/test/mcs.sdf similarity index 100% rename from tests/data/mcs.sdf rename to test/mcs.sdf diff --git a/tests/data/morgan_ruiner.sdf b/test/morgan_ruiner.sdf similarity index 100% rename from tests/data/morgan_ruiner.sdf rename to test/morgan_ruiner.sdf diff --git a/tests/data/peptide.sdf b/test/peptide.sdf similarity index 100% rename from tests/data/peptide.sdf rename to test/peptide.sdf diff --git a/tests/data/reaction_centerslist.rdf b/test/reaction_centerslist.rdf similarity index 100% rename from tests/data/reaction_centerslist.rdf rename to test/reaction_centerslist.rdf diff --git a/tests/data/standardize.rdf b/test/standardize.rdf similarity index 100% rename from tests/data/standardize.rdf rename to test/standardize.rdf diff --git a/tests/data/standardize.sdf b/test/standardize.sdf similarity index 100% rename from tests/data/standardize.sdf rename to test/standardize.sdf diff --git a/tests/data/stereo.sdf b/test/stereo.sdf similarity index 100% rename from tests/data/stereo.sdf rename to test/stereo.sdf diff --git a/tests/containers/test_query.py b/tests/containers/test_query.py deleted file mode 100644 index 78d8e3f7..00000000 --- a/tests/containers/test_query.py +++ /dev/null @@ -1,174 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -import pytest -from chython.containers.query import QueryContainer -from chython.containers.bonds import QueryBond -from chython.periodictable import QueryElement - - -def test_query_container_basic(): - # Test basic container creation and atom addition - qc = QueryContainer() - - # Add atoms with different input types - n1 = qc.add_atom('C') # from symbol - assert isinstance(qc._atoms[n1], QueryElement) - - n2 = qc.add_atom(7) # from atomic number (N) - assert isinstance(qc._atoms[n2], QueryElement) - - n3 = qc.add_atom(QueryElement.from_symbol('O')()) # from QueryElement - assert isinstance(qc._atoms[n3], QueryElement) - - -def test_query_container_neighbors(): - # Test neighbors validation and storage - qc = QueryContainer() - - # Test valid neighbors - n1 = qc.add_atom('C', neighbors=2) # single value - assert qc._neighbors[n1] == (2,) - - n2 = qc.add_atom('C', neighbors=[1, 2, 3]) # list of values - assert qc._neighbors[n2] == (1, 2, 3) - - # Test invalid neighbors - with pytest.raises(ValueError): - qc.add_atom('C', neighbors=-1) # negative value - - with pytest.raises(ValueError): - qc.add_atom('C', neighbors=15) # value too large - - with pytest.raises(ValueError): - qc.add_atom('C', neighbors=[1, 1]) # duplicate values - - -def test_query_container_hybridization(): - # Test hybridization validation and storage - qc = QueryContainer() - - # Test valid hybridization - n1 = qc.add_atom('C', hybridization=1) # sp3 - assert qc._hybridizations[n1] == (1,) - - n2 = qc.add_atom('C', hybridization=[1, 2]) # sp3 and sp2 - assert qc._hybridizations[n2] == (1, 2) - - # Test invalid hybridization - with pytest.raises(ValueError): - qc.add_atom('C', hybridization=0) # invalid value - - with pytest.raises(ValueError): - qc.add_atom('C', hybridization=5) # invalid value - - -def test_query_container_rings(): - # Test ring size validation and storage - qc = QueryContainer() - - # Test valid ring sizes - n1 = qc.add_atom('C', rings_sizes=3) # 3-membered ring - assert qc._rings_sizes[n1] == (3,) - - n2 = qc.add_atom('C', rings_sizes=[5, 6]) # 5 and 6-membered rings - assert qc._rings_sizes[n2] == (5, 6) - - # Test invalid ring sizes - with pytest.raises(ValueError): - qc.add_atom('C', rings_sizes=2) # too small - - with pytest.raises(ValueError): - qc.add_atom('C', rings_sizes=[5, 5]) # duplicate values - - -def test_query_container_bonds(): - # Test bond addition and validation - qc = QueryContainer() - n1 = qc.add_atom('C') - n2 = qc.add_atom('C') - - # Add bond with different input types - qc.add_bond(n1, n2, 1) # from int (single bond) - assert isinstance(qc._bonds[n1][n2], QueryBond) - - qc = QueryContainer() - n1 = qc.add_atom('C') - n2 = qc.add_atom('C') - qc.add_bond(n1, n2, (1, 2)) # from tuple (single or double bond) - assert isinstance(qc._bonds[n1][n2], QueryBond) - - -def test_query_container_copy(): - # Test container copying - qc = QueryContainer() - n1 = qc.add_atom('C', neighbors=2, hybridization=1) - n2 = qc.add_atom('N', rings_sizes=6) - qc.add_bond(n1, n2, 1) - - # Make a copy - copy = qc.copy() - - # Verify all attributes are copied - assert copy._neighbors == qc._neighbors - assert copy._hybridizations == qc._hybridizations - assert copy._rings_sizes == qc._rings_sizes - assert len(copy._bonds) == len(qc._bonds) - - -def test_query_container_union(): - # Test container union - qc1 = QueryContainer() - n1 = qc1.add_atom('C', neighbors=2) - n2 = qc1.add_atom('O') - qc1.add_bond(n1, n2, 1) - - qc2 = QueryContainer() - n3 = qc2.add_atom('N', rings_sizes=5) - n4 = qc2.add_atom('C') - qc2.add_bond(n3, n4, 2) - - # Create union with remapping to avoid collisions - union = qc1.union(qc2, remap=True) - - # Verify union properties - assert len(union._atoms) == 4 # total number of atoms - assert len(union._bonds) == 4 # each bond is stored twice (bidirectional) - assert sum(len(bonds) for bonds in union._bonds.values()) == 4 # total number of bond entries - assert len(union._neighbors) == len(qc1._neighbors) + len(qc2._neighbors) - assert len(union._rings_sizes) == len(qc1._rings_sizes) + len(qc2._rings_sizes) - - -def test_query_container_enumerate(): - # Test query enumeration - qc = QueryContainer() - n1 = qc.add_atom('C') - # Add N and O separately to test enumeration - n2 = qc.add_atom('N') - n3 = qc.add_atom('O') - qc.add_bond(n1, n2, (1, 2)) # single or double bond - qc.add_bond(n1, n3, 1) # single bond - - # Enumerate all possible combinations - queries = list(qc.enumerate_queries()) - assert len(queries) >= 2 # at least 2 combinations due to bond types - - # Test with mark enumeration - queries = list(qc.enumerate_queries(enumerate_marks=True)) - assert len(queries) >= 2 # should include mark combinations \ No newline at end of file From 95f27d85148e206aa228212b627a2fb80ec2ea78 Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 29 Jan 2025 21:36:01 +0100 Subject: [PATCH 59/68] aromaticity test refactored. still not full coverage of conditions. --- .../algorithms/aromatics/test/test_kekule.py | 132 +++++++------- .../algorithms/aromatics/test/test_thiele.py | 163 ++++++------------ 2 files changed, 124 insertions(+), 171 deletions(-) diff --git a/chython/algorithms/aromatics/test/test_kekule.py b/chython/algorithms/aromatics/test/test_kekule.py index 7465b819..86ceae59 100644 --- a/chython/algorithms/aromatics/test/test_kekule.py +++ b/chython/algorithms/aromatics/test/test_kekule.py @@ -26,57 +26,72 @@ def test_kekule_basic(): # Test basic aromatic ring conversion mol = smiles('c1ccccc1') # benzene assert mol.kekule() # should return True for aromatic rings - - # Verify alternating single and double bonds - bonds = mol._bonds - double_bonds = sum(1 for n, ms in bonds.items() for m, b in ms.items() if b.order == 2 and m > n) - assert double_bonds == 3 # benzene should have 3 double bonds + assert mol == smiles('C1=CC=CC=C1') def test_kekule_pyridine(): # Test pyridine and its derivatives mol = smiles('n1ccccc1') # pyridine assert mol.kekule() - + assert mol == smiles('N1=CC=CC=C1') + assert mol.atom(1).implicit_hydrogens == 0 + # Test protonated pyridine - mol_protonated = smiles('[nH+]1ccccc1') - assert mol_protonated.kekule() + mol = smiles('[nH+]1ccccc1') + assert mol.kekule() + assert mol == smiles('[NH+]1=CC=CC=C1') def test_kekule_pyrrole(): # Test pyrrole and its derivatives mol = smiles('[nH]1cccc1') # pyrrole assert mol.kekule() - + assert mol == smiles('N1C=CC=C1') + assert mol.atom(1).implicit_hydrogens == 1 + + mol = smiles('n1cccc1') + assert mol.kekule() + assert mol == smiles('N1C=CC=C1') + assert mol.atom(1).implicit_hydrogens == 1 + # Test N-methylpyrrole - mol_methyl = smiles('Cn1cccc1') - assert mol_methyl.kekule() + mol = smiles('Cn1cccc1') + assert mol.kekule() + assert mol == smiles('CN1C=CC=C1') + assert mol.atom(2).implicit_hydrogens == 0 def test_kekule_furan_thiophene(): # Test oxygen and sulfur containing aromatics - mol_furan = smiles('o1cccc1') - assert mol_furan.kekule() - - mol_thiophene = smiles('s1cccc1') - assert mol_thiophene.kekule() + mol = smiles('o1cccc1') + assert mol.kekule() + assert mol == smiles('O1C=CC=C1') + assert mol.atom(1).implicit_hydrogens == 0 + + mol = smiles('s1cccc1') + assert mol.kekule() + assert mol == smiles('S1C=CC=C1') + assert mol.atom(1).implicit_hydrogens == 0 def test_kekule_complex_systems(): # Test fused ring systems - mol_naphthalene = smiles('c1ccc2ccccc2c1') - assert mol_naphthalene.kekule() - + mol = smiles('c1ccc2ccccc2c1') + assert mol.kekule() + assert mol == smiles('C1=CC2=CC=CC=C2C=C1') or mol == smiles('C1=CC2=C(C=C1)C=CC=C2') + # Test indole - mol_indole = smiles('c1ccc2[nH]ccc2c1') - assert mol_indole.kekule() + mol = smiles('c1ccc2[nH]ccc2c1') + assert mol.kekule() + assert mol == smiles('N1C=CC2=C1C=CC=C2') or mol == smiles('N1C=CC2=CC=CC=C12') def test_kekule_enumeration(): - # Test enumeration of Kekulé structures - mol = smiles('c1ccccc1') # benzene + mol = smiles('Cc1ccccc1C') forms = list(mol.enumerate_kekule()) - assert len(forms) == 2 # benzene has 2 Kekulé forms + assert len(forms) == 2 # benzene has 2 Kekule forms + assert smiles('CC1=C(C)C=CC=C1') in forms + assert smiles('CC1=CC=CC=C1C') in forms def test_kekule_invalid_structures(): @@ -84,11 +99,11 @@ def test_kekule_invalid_structures(): with pytest.raises(InvalidAromaticRing): mol = smiles('c1cccc1') # 5-membered carbon ring (invalid aromatic) mol.kekule() - + with pytest.raises(InvalidAromaticRing): mol = smiles('c1ccc2c1c3ccccc3cc2') # acenaphthalene (invalid aromatic form) mol.kekule() - + with pytest.raises(InvalidAromaticRing): mol = smiles('c1cccc1C(=O)c1cccc1') # cyclopentadiene with carbonyl (invalid aromatic) mol.kekule() @@ -96,59 +111,50 @@ def test_kekule_invalid_structures(): def test_kekule_charged_species(): # Test charged aromatic species - mol_pyridinium = smiles('[n+]1ccccc1') - assert mol_pyridinium.kekule() - - mol_cyclopentadienyl = smiles('[cH-]1cccc1') - assert mol_cyclopentadienyl.kekule() + mol = smiles('[n+]1ccccc1') + assert mol.kekule() + assert mol == smiles('C=1[NH+]=CC=CC=1') + + mol = smiles('[cH-]1cccc1') + assert mol.kekule() + assert mol == smiles('C=1C=C[CH-]C=1') def test_kekule_multiple_rings(): # Test molecules with multiple aromatic rings - mol_biphenyl = smiles('c1ccccc1-c2ccccc2') - assert mol_biphenyl.kekule() - - # Test phenylpyridine - mol_phenylpyridine = smiles('c1ccccc1-c2ccccn2') - assert mol_phenylpyridine.kekule() + mol = smiles('c1ccccc1c2ccccc2') + assert mol.kekule() + assert mol == smiles('C1=CC=C(C=C1)C1=CC=CC=C1') def test_kekule_heteroatoms(): # Test various heteroatoms in aromatic rings - mol_pyrazine = smiles('n1ccncc1') # two nitrogens - assert mol_pyrazine.kekule() - - mol_oxazole = smiles('o1cncc1') # oxygen and nitrogen - assert mol_oxazole.kekule() - - mol_thiazole = smiles('s1cncc1') # sulfur and nitrogen - assert mol_thiazole.kekule() + mol = smiles('c1cncn1') # two nitrogens + assert mol.kekule() + assert mol == smiles('N1C=CN=C1') + + mol = smiles('o1cncc1') # oxygen and nitrogen + assert mol.kekule() + assert mol == smiles('C1=COC=N1') def test_kekule_buffer_size(): # Test buffer size parameter for complex heterocycles - mol1 = smiles('c1ccc2[nH]ccc2c1') # indole - assert mol1.kekule(buffer_size=1) # small buffer - - mol2 = smiles('c1ccc2[nH]ccc2c1') # fresh indole instance - assert mol2.kekule(buffer_size=10) # large buffer + mol = smiles('c1ccc2[nH]ccc2c1') # indole + assert mol.kekule(buffer_size=1) # small buffer + + mol = smiles('c1ccc2[nH]ccc2c1') # fresh indole instance + assert mol.kekule(buffer_size=10) # large buffer def test_kekule_radical_species(): - # Test radical aromatic species - mol_phenoxy = smiles('[O]c1ccccc1') - assert mol_phenoxy.kekule() - - # Test radical cation - mol_benzene_radical = smiles('[c]1ccccc1') - assert mol_benzene_radical.kekule() + mol = smiles('[c]1ccccc1') + assert mol.kekule() + assert mol == smiles('C=1C=CC=[C]C=1 |^1:4|') def test_kekule_quinones(): # Test quinone-like structures - mol_benzoquinone = smiles('O=C1C=CC(=O)C=C1') - assert not mol_benzoquinone.kekule() # not aromatic - - # Test semiquinone - mol_semiquinone = smiles('O=C1C=CC(O)C=C1') - assert not mol_semiquinone.kekule() # not aromatic \ No newline at end of file + mol = smiles('O=c1ccc(=O)cc1') + assert mol.kekule() + assert mol == smiles('C1=CC(C=CC1=O)=O') diff --git a/chython/algorithms/aromatics/test/test_thiele.py b/chython/algorithms/aromatics/test/test_thiele.py index 5eb19ba5..a8b7d087 100644 --- a/chython/algorithms/aromatics/test/test_thiele.py +++ b/chython/algorithms/aromatics/test/test_thiele.py @@ -24,7 +24,7 @@ def test_basic_thiele(): # Test basic aromatic systems mol = smiles('C1=CC=CC=C1') # benzene in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic (order 4) for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -34,7 +34,7 @@ def test_pyrrole_thiele(): # Test pyrrole-like systems mol = smiles('N1C=CC=C1') # pyrrole in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -44,7 +44,7 @@ def test_furan_thiele(): # Test furan-like systems mol = smiles('O1C=CC=C1') # furan in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -54,7 +54,7 @@ def test_thiophene_thiele(): # Test thiophene-like systems mol = smiles('S1C=CC=C1') # thiophene in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -64,7 +64,7 @@ def test_condensed_rings(): # Test condensed ring systems mol = smiles('C1=CC=C2C=CC=CC2=C1') # naphthalene in Kekule form assert mol.thiele() - + # Check that all bonds in both rings are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -75,37 +75,34 @@ def test_tautomer_fix(): mol = smiles('N1C=CC2=NC=CC2=C1') # before fix assert mol.thiele(fix_tautomers=True) - # After tautomer fixing, one N should have 0 H and the other should have 1 H - n_with_h = 0 - n_without_h = 0 - for n, atom in mol.atoms(): - if atom.atomic_number == 7: # nitrogen - assert len(mol._bonds[n]) == 2 # should have 2 bonds - if mol._hydrogens[n] == 1: - n_with_h += 1 - elif mol._hydrogens[n] == 0: - n_without_h += 1 - - assert n_with_h == 1 # one nitrogen should have 1 hydrogen - assert n_without_h == 1 # one nitrogen should have 0 hydrogens + for n, m, bond in mol.bonds(): + assert bond.order == 4 + + assert mol.atom(1).implicit_hydrogens == 0 + assert mol.atom(5).implicit_hydrogens == 1 + + mol = smiles('N1C=CC2=NC=CC2=C1') # before fix + assert mol.thiele(fix_tautomers=False) + + for n, m, bond in mol.bonds(): + assert bond.order == 4 + + assert mol.atom(1).implicit_hydrogens == 1 + assert mol.atom(5).implicit_hydrogens == 0 def test_quinone_exclusion(): # Test that quinone-like structures are not aromatized mol = smiles('O=C1C=CC(=O)C=C1') # para-benzoquinone - initial_double_bonds = [(n, m) for n, m, bond in mol.bonds() if bond.order == 2] assert not mol.thiele() # should return False - - # Check that double bonds remain as double bonds - final_double_bonds = [(n, m) for n, m, bond in mol.bonds() if bond.order == 2] - assert len(final_double_bonds) == len(initial_double_bonds) + assert mol == smiles('O=C1C=CC(=O)C=C1') def test_invalid_rings(): # Test that invalid rings are not aromatized mol = smiles('C1=CC=C1') # cyclobutadiene - too small assert not mol.thiele() # should return False - + mol = smiles('C1=CC=CC=CC=CC=C1') # 10-membered ring - too large assert not mol.thiele() # should return False @@ -114,21 +111,20 @@ def test_charged_systems(): # Test charged aromatic systems mol = smiles('[NH+]1C=CC=CC=1') # pyridinium in Kekule form assert mol.thiele() - + # Check that charge is preserved and ring is aromatic - for n, atom in mol.atoms(): - if atom.atomic_number == 7: # nitrogen - assert mol._charges[n] == 1 # should have +1 charge - + assert mol.atom(1).charge == 1 + assert mol.atom(1).implicit_hydrogens == 1 + for n, m, bond in mol.bonds(): assert bond.order == 4 # all bonds should be aromatic def test_freak_rules(): # Test special cases handled by freak rules - mol = smiles('N1C=CN2C=CC=CC1=2') # special N-fused system in Kekule form + mol = smiles('N1C=CN2C=CC=C12') # special N-fused system in Kekule form assert mol.thiele() - + # Check that all bonds are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -137,29 +133,15 @@ def test_freak_rules(): def test_tetracyclic_systems(): # Test 4-membered ring in condensed systems mol = smiles('C1=CC2=C(C=CC=C2)C1') # benzocyclobutene - result = mol.thiele() - - # Only the 6-membered ring should be aromatic - aromatic_bonds = 0 - rings_by_size = {len(ring): ring for ring in mol.sssr} - - for n, m, bond in mol.bonds(): - if bond.order == 4: # aromatic bond - aromatic_bonds += 1 - # Check that this bond belongs to the 6-membered ring - if 6 in rings_by_size: - six_ring = rings_by_size[6] - assert n in six_ring and m in six_ring - - assert aromatic_bonds == 6 # six aromatic bonds in benzene ring - assert result # should return True since part of the molecule is aromatic + assert mol.thiele() + assert mol == smiles('C1C=Cc2ccccc12') def test_phosphorus_rings(): # Test phosphorus-containing aromatic rings mol = smiles('P1C=CC=CC=1') # phosphabenzene in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -169,7 +151,7 @@ def test_boron_rings(): # Test boron-containing aromatic rings mol = smiles('B1C=CC=C1') # borole in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -180,46 +162,43 @@ def test_seven_membered_rings(): # Only boron-containing 7-membered rings should be aromatic mol = smiles('B1C=CC=CC=C1') # 7-membered ring with boron assert mol.thiele() - + mol = smiles('N1C=CC=CC=C1') # 7-membered ring with nitrogen assert not mol.thiele() def test_ferrocene_like(): # Test negatively charged carbon systems (ferrocene-like) - mol = smiles('[C-]1C=CC=C1') # cyclopentadienyl anion + mol = smiles('[CH-]1C=CC=C1') # cyclopentadienyl anion assert mol.thiele() - - # Check charge preservation and aromaticity - charged_carbons = 0 - for n, atom in mol.atoms(): - if atom.atomic_number == 6 and mol._charges[n] == -1: - charged_carbons += 1 - assert charged_carbons == 1 - + for n, m, bond in mol.bonds(): assert bond.order == 4 + assert int(mol) == -1 + assert mol.atom(1).charge == -1 + def test_multiple_components(): # Test systems with multiple aromatic components mol = smiles('C1=CC=CC=C1.C1=CC=CC=C1') # two benzene molecules in Kekule form assert mol.thiele() - + # Check that all bonds in both components are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 + assert mol.connected_components_count == 2 def test_complex_fused_systems(): # Test complex fused ring systems with multiple heteroatoms - + # Benzothiazole (simplified) mol = smiles('C1=CC=C2SC=NC2=C1') # benzothiazole in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + # Thienopyridine (simplified) mol = smiles('C1=CC=NC2=CSC=C12') # thienopyridine in Kekule form assert mol.thiele() @@ -229,88 +208,56 @@ def test_complex_fused_systems(): def test_complex_charged_systems(): # Test charged aromatic systems - + # Basic charged systems that should work # Pyridinium mol = smiles('[NH+]1C=CC=CC=1') # pyridinium in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + assert mol.atom(1).charge == 1 + assert mol.atom(1).implicit_hydrogens == 1 + # Complex charged systems that are not yet supported # N-methylpyridinium (currently not aromatized properly) mol = smiles('C[N+]1=CC=CC=C1') # N-methylpyridinium in Kekule form assert mol.thiele() + assert mol.atom(2).charge == 1 def test_complex_heterocycles(): # Test heterocyclic systems - + # Basic heterocycles that should work # Benzimidazole mol = smiles('C1=CC=C2NC=NC2=C1') # in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + # Quinoxaline mol = smiles('C1=CC=C2N=CC=NC2=C1') # in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + # Benzimidazole fused to thiophene (works) mol = smiles('C1=CC2=C(C=C1)N=CN2C3=CC=CS3') # in Kekule form - result = mol.thiele() - assert result # use stored result instead of calling thiele() again - - # Check bonds in each ring separately - benzene_ring = {1, 2, 3, 4, 5, 6} # benzene part - imidazole_ring = {3, 4, 7, 8, 9} # imidazole part - thiophene_ring = {10, 11, 12, 13, 14} # thiophene part - + assert mol.thiele() + for n, m, bond in mol.bonds(): # Check if both atoms of the bond are in the same ring - if (n in benzene_ring and m in benzene_ring) or \ - (n in imidazole_ring and m in imidazole_ring) or \ - (n in thiophene_ring and m in thiophene_ring): - assert bond.order == 4, f"Bond {n}-{m} should be aromatic" + assert bond.order == 4 or (bond.order == 1 and n in (9, 10) and m in (9, 10)) def test_complex_bridged_systems(): - # Test bridged aromatic systems - - # Basic bridged systems that should work - # Naphthalene - mol = smiles('C1=CC=C2C=CC=CC2=C1') # in Kekule form - assert mol.thiele() - for n, m, bond in mol.bonds(): - assert bond.order == 4 - - # Quinoline - mol = smiles('C1=CC=C2C=CC=NC2=C1') # in Kekule form - assert mol.thiele() - for n, m, bond in mol.bonds(): - assert bond.order == 4 - # Complex bridged system with multiple heteroatoms (works) mol = smiles('C1=CC2=C(C=C1)N=C3C(=C2)C=CC4=C3N=CS4') # in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + # Bridged system with N and S (works) mol = smiles('C1=CC2=C(C=C1)SC3=C(N=CC=C3)C=C2') # in Kekule form - result = mol.thiele() - assert result # use stored result instead of calling thiele() again - - # Check bonds in each ring separately - benzene_ring = {1, 2, 3, 4, 5, 6} # benzene part - pyridine_ring = {8, 9, 10, 11, 12, 13} # pyridine part - - for n, m, bond in mol.bonds(): - # Check if both atoms of the bond are in the same ring - if (n in benzene_ring and m in benzene_ring) or \ - (n in pyridine_ring and m in pyridine_ring): - assert bond.order == 4, f"Bond {n}-{m} should be aromatic" - # Bonds between rings or to S can remain non-aromatic \ No newline at end of file + assert mol.thiele() + assert mol == smiles('S1c2ccccc2C=Cc2[n]cccc12') From feac7568be2844e3054344d1c549628ecc1f3490 Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 29 Jan 2025 22:02:35 +0100 Subject: [PATCH 60/68] linear fp tests refactored --- .github/workflows/tests.yml | 22 ++-- .../algorithms/aromatics/test/test_kekule.py | 8 +- .../fingerprints/test/test_linear.py | 110 +++++++----------- 3 files changed, 58 insertions(+), 82 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 14ec8ebf..6d4a04c6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,9 +2,9 @@ name: Tests on: push: - branches: [ main, master, tests_update ] + branches: [ main, master ] pull_request: - branches: [ main, master, tests_update ] + branches: [ main, master ] jobs: test: @@ -52,25 +52,25 @@ jobs: steps: - uses: actions/checkout@v3 - + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} cache: 'pip' # Cache pip dependencies - + - name: Install system dependencies (Linux) if: runner.os == 'Linux' run: | sudo apt-get update sudo apt-get install -y build-essential gcc g++ gfortran gcc --version # Print version for debugging - + - name: Install system dependencies (Windows) if: runner.os == 'Windows' run: | choco install visualstudio2019buildtools --package-parameters "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64" - + - name: Install Cython and core dependencies run: | python -m pip install --upgrade pip @@ -79,7 +79,7 @@ jobs: pip install ${{ matrix.extra_deps }} fi shell: bash - + - name: Install Poetry uses: snok/install-poetry@v1 with: @@ -115,17 +115,17 @@ jobs: env: PATH: ${{ env.PATH }};${{ env.APPDATA }}\Python\Scripts timeout-minutes: 15 - + - name: Clean Cython build shell: bash run: | rm -rf build/ rm -rf *.so find . -name "*.c" -delete || true - + - name: Force Cython recompilation run: poetry run pip install -e . --no-deps --force-reinstall - + - name: Install test dependencies run: | poetry run pip install pytest-html pytest-cov @@ -158,4 +158,4 @@ jobs: file: test-results/coverage.xml flags: unittests name: codecov-umbrella - fail_ci_if_error: false \ No newline at end of file + fail_ci_if_error: false \ No newline at end of file diff --git a/chython/algorithms/aromatics/test/test_kekule.py b/chython/algorithms/aromatics/test/test_kekule.py index 86ceae59..be33e701 100644 --- a/chython/algorithms/aromatics/test/test_kekule.py +++ b/chython/algorithms/aromatics/test/test_kekule.py @@ -18,8 +18,8 @@ # along with this program; if not, see . # from chython import smiles -import pytest from chython.exceptions import InvalidAromaticRing +from pytest import raises def test_kekule_basic(): @@ -96,15 +96,15 @@ def test_kekule_enumeration(): def test_kekule_invalid_structures(): # Test invalid aromatic structures - with pytest.raises(InvalidAromaticRing): + with raises(InvalidAromaticRing): mol = smiles('c1cccc1') # 5-membered carbon ring (invalid aromatic) mol.kekule() - with pytest.raises(InvalidAromaticRing): + with raises(InvalidAromaticRing): mol = smiles('c1ccc2c1c3ccccc3cc2') # acenaphthalene (invalid aromatic form) mol.kekule() - with pytest.raises(InvalidAromaticRing): + with raises(InvalidAromaticRing): mol = smiles('c1cccc1C(=O)c1cccc1') # cyclopentadiene with carbonyl (invalid aromatic) mol.kekule() diff --git a/chython/algorithms/fingerprints/test/test_linear.py b/chython/algorithms/fingerprints/test/test_linear.py index fa39950e..b4407435 100644 --- a/chython/algorithms/fingerprints/test/test_linear.py +++ b/chython/algorithms/fingerprints/test/test_linear.py @@ -17,110 +17,86 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from chython import smiles import numpy as np -import pytest +from chython import smiles def test_linear_fingerprint_basic(): # Test basic fingerprint generation mol = smiles('CCO') fp = mol.linear_fingerprint(min_radius=1, max_radius=2, length=1024) - + # Test array properties assert isinstance(fp, np.ndarray) assert fp.dtype == np.uint8 assert fp.shape == (1024,) - + # Test binary nature assert set(np.unique(fp)).issubset({0, 1}) + # Test different lengths + fp = mol.linear_fingerprint(length=2048) + assert isinstance(fp, np.ndarray) + assert fp.dtype == np.uint8 + assert fp.shape == (2048,) + def test_linear_fingerprint_consistency(): # Test that fingerprints are consistent for the same molecule - mol = smiles('CCO') - fp1 = mol.linear_fingerprint() - fp2 = mol.linear_fingerprint() - + fp1 = smiles('CCO').linear_fingerprint() + fp2 = smiles('OCC').linear_fingerprint() + # Test exact equality of arrays - np.testing.assert_array_equal(fp1, fp2) - - # Test different molecules give different fingerprints - mol2 = smiles('CCC') - fp3 = mol2.linear_fingerprint() + assert np.array_equal(fp1, fp2) + + # Test different molecules give different fingerprints) + fp3 = smiles('CCC').linear_fingerprint() assert not np.array_equal(fp1, fp3) def test_linear_fingerprint_parameters(): mol = smiles('CCO') - + # Test different radius parameters fp1 = mol.linear_fingerprint(min_radius=1, max_radius=2) fp2 = mol.linear_fingerprint(min_radius=1, max_radius=3) - assert fp2.sum() >= fp1.sum() # More radius should capture more features - - # Test different lengths - fp3 = mol.linear_fingerprint(length=2048) - assert fp3.shape == (2048,) - assert isinstance(fp3, np.ndarray) - assert fp3.dtype == np.uint8 - + assert not np.array_equal(fp1, fp2) + assert np.array_equal(fp1 & fp2, fp1) + # Test number of active bits + fp3 = mol.linear_fingerprint(number_active_bits=2) fp4 = mol.linear_fingerprint(number_active_bits=3) - assert fp4.sum() >= fp1.sum() # More active bits should set more bits + assert not np.array_equal(fp3, fp4) + assert np.array_equal(fp3 & fp4, fp3) def test_linear_fingerprint_bit_pairs(): # Test the number_bit_pairs parameter - mol = smiles('CCCC') # molecule with multiple similar fragments - + mol = smiles('CCCCCCCCCCCCCCCCCCCCCCCC') # molecule with multiple similar fragments + # Compare different number_bit_pairs settings - fp1 = mol.linear_fingerprint(number_bit_pairs=1) - fp2 = mol.linear_fingerprint(number_bit_pairs=2) - fp3 = mol.linear_fingerprint(number_bit_pairs=4) - - # More bit pairs should potentially activate more bits - assert fp1.sum() <= fp2.sum() <= fp3.sum() - - -def test_linear_fingerprint_complex_molecule(): - # Test with a more complex molecule - mol = smiles('c1ccccc1CC(=O)O') - fp = mol.linear_fingerprint() - - # Basic checks - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - - # Should have reasonable number of bits set - assert 0 < fp.sum() < len(fp) # some bits should be set, but not all - - # Test with different parameters - fp_large = mol.linear_fingerprint(max_radius=6, length=2048) - assert fp_large.shape == (2048,) - assert fp_large.sum() > 0 + fp1 = mol.linear_fingerprint(number_bit_pairs=2) + fp2 = mol.linear_fingerprint(number_bit_pairs=3) + + assert not np.array_equal(fp1, fp2) + assert np.array_equal(fp1 & fp2, fp1) def test_linear_fingerprint_edge_cases(): - # Test single atom - mol_single = smiles('C') - fp_single = mol_single.linear_fingerprint() - assert isinstance(fp_single, np.ndarray) - assert fp_single.dtype == np.uint8 - assert fp_single.sum() > 0 # should have some bits set - - # Test disconnected components - mol_disconnected = smiles('CC.CC') - fp_disconnected = mol_disconnected.linear_fingerprint() - assert isinstance(fp_disconnected, np.ndarray) - assert fp_disconnected.dtype == np.uint8 + fp1 = smiles('C').linear_fingerprint() + assert fp1.sum() == 2 + + fp1 = smiles('CC').linear_fingerprint() + fp2 = smiles('CC.CC').linear_fingerprint() + assert not np.array_equal(fp1, fp2) + assert np.array_equal(fp1 & fp2, fp1) def test_linear_fingerprint_arbitrary_length(): # Test that non-power-of-2 lengths work but might have unexpected behavior mol = smiles('CCO') lengths = [100, 1000, 1500, 3000] - + for length in lengths: fp = mol.linear_fingerprint(length=length) assert isinstance(fp, np.ndarray) @@ -135,22 +111,22 @@ def test_linear_fingerprint_comparison(): mol1 = smiles('CCO') mol2 = smiles('CCC') mol3 = smiles('CCCO') - + fp1 = mol1.linear_fingerprint() fp2 = mol2.linear_fingerprint() fp3 = mol3.linear_fingerprint() - + # Calculate Tanimoto similarities def tanimoto(a, b): intersection = np.sum(np.logical_and(a, b)) union = np.sum(np.logical_or(a, b)) return intersection / union if union > 0 else 0.0 - + # Similar molecules should have higher similarity sim12 = tanimoto(fp1, fp2) sim13 = tanimoto(fp1, fp3) sim23 = tanimoto(fp2, fp3) - + assert 0 <= sim12 <= 1 assert 0 <= sim13 <= 1 - assert 0 <= sim23 <= 1 \ No newline at end of file + assert 0 <= sim23 <= 1 From cd29ef760a3bb9c03a5a5dd39e906eb68f8f0c7f Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 29 Jan 2025 22:37:26 +0100 Subject: [PATCH 61/68] tokenizer test refactored --- chython/files/daylight/test/test_tokenize.py | 120 ++++++++----------- 1 file changed, 53 insertions(+), 67 deletions(-) diff --git a/chython/files/daylight/test/test_tokenize.py b/chython/files/daylight/test/test_tokenize.py index b5b42d34..3532813a 100644 --- a/chython/files/daylight/test/test_tokenize.py +++ b/chython/files/daylight/test/test_tokenize.py @@ -17,75 +17,61 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -import pytest -from chython.exceptions import IncorrectSmiles -from chython.files.daylight.parser import parser from chython.files.daylight.tokenize import smiles_tokenize, smarts_tokenize - - -def test_smiles_tokenize_atoms(): - # Test basic atom tokenization - tokens = list(smiles_tokenize('C')) - assert len(tokens) == 1 - assert isinstance(tokens[0], tuple) - assert len(tokens[0]) == 2 - assert isinstance(tokens[0][1], dict) - assert tokens[0][1].get('element') == 'C' - - -def test_smiles_tokenize_bonds(): - # Test bond tokenization - tokens = list(smiles_tokenize('C=O')) - assert len(tokens) == 3 - assert tokens[1][0] == 1 # bond index - assert tokens[1][1] == 2 # double bond - - -def test_smiles_tokenize_branches(): - # Test branch tokenization - tokens = list(smiles_tokenize('C(O)N')) - assert len(tokens) == 5 - assert tokens[1][0] == 2 # branch start index - assert tokens[3][0] == 3 # branch end index - - -def test_smiles_tokenize_cycles(): - # Test cycle tokenization - tokens = list(smiles_tokenize('C1CCC1')) - assert len(tokens) == 6 - assert tokens[1][0] == 6 # cycle number - - -def test_smiles_tokenize_charges(): - # Test charge tokenization - tokens = list(smiles_tokenize('[NH4+]')) - assert len(tokens) == 1 # NH4+ as a single token - assert tokens[0][1].get('charge') == 1 # positive charge - assert tokens[0][1].get('element') == 'N' # nitrogen - assert tokens[0][1].get('hydrogen') == 4 # 4 hydrogens - - -def test_smarts_tokenize_basic(): +from chython.containers import QueryBond +from chython.exceptions import IncorrectSmiles +from pytest import raises + + +def test_smiles_tokenize(): + assert smiles_tokenize('C') == [(0, {'element': 'C'})] + assert smiles_tokenize('CC') == [(0, {'element': 'C'}), (0, {'element': 'C'})] + assert smiles_tokenize('C=O') == [(0, {'element': 'C'}), (1, 2), (0, {'element': 'O'})] + assert smiles_tokenize('C(O)N') == [(0, {'element': 'C'}), (2, None), (0, {'element': 'O'}), + (3, None), (0, {'element': 'N'})] + assert smiles_tokenize('C2CC2') == [(0, {'element': 'C'}), (6, 2), (0, {'element': 'C'}), + (0, {'element': 'C'}), (6, 2)] + + +def test_smiles_tokenize_atom(): + assert smiles_tokenize('[NH4+]') == [(0, {'element': 'N', 'isotope': None, 'parsed_mapping': None, 'charge': 1, + 'implicit_hydrogens': 4, 'stereo': None})] + assert smiles_tokenize('[14N]') == [(0, {'element': 'N', 'isotope': 14, 'parsed_mapping': None, 'charge': 0, + 'implicit_hydrogens': 0, 'stereo': None})] + assert smiles_tokenize('[N@H]') == [(0, {'element': 'N', 'isotope': None, 'parsed_mapping': None, 'charge': 0, + 'implicit_hydrogens': 1, 'stereo': True})] + assert smiles_tokenize('[N@@H--]') == [(0, {'element': 'N', 'isotope': None, 'parsed_mapping': None, 'charge': -2, + 'implicit_hydrogens': 1, 'stereo': False})] + assert smiles_tokenize('[N@+3]') == [(0, {'element': 'N', 'isotope': None, 'parsed_mapping': None, 'charge': 3, + 'implicit_hydrogens': 0, 'stereo': True})] + assert smiles_tokenize('[CH2:2]') == [(0, {'element': 'C', 'isotope': None, 'parsed_mapping': 2, 'charge': 0, + 'implicit_hydrogens': 2, 'stereo': None})] + with raises(IncorrectSmiles): + smiles_tokenize('[@N]') + + +def test_smarts_tokenize_atom(): # Test basic SMARTS tokenization - tokens = list(smarts_tokenize('[C]')) - assert len(tokens) == 1 # just C - assert tokens[0][1].get('element') == 'C' + assert smarts_tokenize('[C]') == [(0, {'element': 'C'})] + assert smarts_tokenize('[C,N]') == [(0, {'element': ['C', 'N']})] + assert smarts_tokenize('[C+]') == [(0, {'charge': 1, 'element': 'C'})] + assert smarts_tokenize('[#1]') == [(0, {'element': 1})] + assert smarts_tokenize('[C;h1;@]') == [(0, {'element': 'C', 'implicit_hydrogens': [1], 'stereo': True})] + assert smarts_tokenize('[O;z1,z2;x1]') == [(0, {'element': 'O', 'heteroatoms': [1], 'hybridization': [1, 2]})] + assert smarts_tokenize('[Se;a;D1,D2;r4,r7:3]') == [(0, {'parsed_mapping': 3, 'element': 'Se', 'hybridization': 4, 'neighbors': [1, 2], 'ring_sizes': [4, 7]})] + assert smarts_tokenize('[Cl;M]') == [(0, {'element': 'Cl', 'masked': True})] + assert smarts_tokenize('[A:1]') == [(0, {'parsed_mapping': 1, 'element': 'A'})] + assert smarts_tokenize('[M]') == [(0, {'element': 'M'})] def test_smarts_tokenize_bonds(): - # Test bond primitives - tokens = list(smarts_tokenize('CC')) - assert len(tokens) == 2 # C, C - assert tokens[0][1].get('element') == 'C' - assert tokens[1][1].get('element') == 'C' - - -# Special cases test commented out due to unpredictable behavior -# def test_tokenize_special_cases(): -# # Test empty string -# with pytest.raises(IncorrectSmiles, match='invalid smiles'): -# list(smiles_tokenize('')) # empty string should raise IncorrectSmiles -# -# # Test whitespace -# with pytest.raises(IncorrectSmiles, match='invalid smiles'): -# list(smiles_tokenize(' ')) # whitespace should raise IncorrectSmiles + assert smarts_tokenize('[C][C]') == [(0, {'element': 'C'}), (0, {'element': 'C'})] + assert smarts_tokenize('[C]-[C]') == [(0, {'element': 'C'}), (1, 1), (0, {'element': 'C'})] + assert smarts_tokenize('[C]~[C]') == [(0, {'element': 'C'}), (1, 8), (0, {'element': 'C'})] + assert smarts_tokenize('[C]!:[C]') == [(0, {'element': 'C'}), (10, [1, 2, 3]), (0, {'element': 'C'})] + assert smarts_tokenize('[C]-,=[C]') == [(0, {'element': 'C'}), (10, [1, 2]), (0, {'element': 'C'})] + assert smarts_tokenize('[C]-;@[C]') == [(0, {'element': 'C'}), (12, QueryBond(1, True)), (0, {'element': 'C'})] + assert smarts_tokenize('[C]!-;!@[C]') == [(0, {'element': 'C'}), (12, QueryBond((2, 3, 4), False)), + (0, {'element': 'C'})] + assert smarts_tokenize('[C]-,=;!@[C]') == [(0, {'element': 'C'}), (12, QueryBond((1, 2), False)), + (0, {'element': 'C'})] From 5b08b9397c58c6d12d2876d0a7f64d08eb987db0 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 2 Feb 2025 13:50:47 +0100 Subject: [PATCH 62/68] fingerprint tests refactored --- .../fingerprints/test/test_linear.py | 35 +-- .../fingerprints/test/test_morgan.py | 297 ++---------------- 2 files changed, 49 insertions(+), 283 deletions(-) diff --git a/chython/algorithms/fingerprints/test/test_linear.py b/chython/algorithms/fingerprints/test/test_linear.py index b4407435..d0accfc8 100644 --- a/chython/algorithms/fingerprints/test/test_linear.py +++ b/chython/algorithms/fingerprints/test/test_linear.py @@ -19,9 +19,10 @@ # import numpy as np from chython import smiles +from pytest import mark, raises -def test_linear_fingerprint_basic(): +def test_basic(): # Test basic fingerprint generation mol = smiles('CCO') fp = mol.linear_fingerprint(min_radius=1, max_radius=2, length=1024) @@ -41,7 +42,15 @@ def test_linear_fingerprint_basic(): assert fp.shape == (2048,) -def test_linear_fingerprint_consistency(): +@mark.parametrize('radius', [(0, 1), (1, 0), (-1, 2), (2, 1)]) +def test_invalid_radius(radius): + mol = smiles('CCO') + min_r, max_r = radius + with raises(AssertionError): + mol.morgan_fingerprint(min_radius=min_r, max_radius=max_r) + + +def test_consistency(): # Test that fingerprints are consistent for the same molecule fp1 = smiles('CCO').linear_fingerprint() fp2 = smiles('OCC').linear_fingerprint() @@ -54,7 +63,7 @@ def test_linear_fingerprint_consistency(): assert not np.array_equal(fp1, fp3) -def test_linear_fingerprint_parameters(): +def test_parameters(): mol = smiles('CCO') # Test different radius parameters @@ -70,7 +79,7 @@ def test_linear_fingerprint_parameters(): assert np.array_equal(fp3 & fp4, fp3) -def test_linear_fingerprint_bit_pairs(): +def test_bit_pairs(): # Test the number_bit_pairs parameter mol = smiles('CCCCCCCCCCCCCCCCCCCCCCCC') # molecule with multiple similar fragments @@ -82,7 +91,7 @@ def test_linear_fingerprint_bit_pairs(): assert np.array_equal(fp1 & fp2, fp1) -def test_linear_fingerprint_edge_cases(): +def test_edge_cases(): fp1 = smiles('C').linear_fingerprint() assert fp1.sum() == 2 @@ -92,21 +101,7 @@ def test_linear_fingerprint_edge_cases(): assert np.array_equal(fp1 & fp2, fp1) -def test_linear_fingerprint_arbitrary_length(): - # Test that non-power-of-2 lengths work but might have unexpected behavior - mol = smiles('CCO') - lengths = [100, 1000, 1500, 3000] - - for length in lengths: - fp = mol.linear_fingerprint(length=length) - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - assert fp.shape == (length,) - # The actual bits set might be fewer than expected due to masking - assert 0 <= fp.sum() <= length - - -def test_linear_fingerprint_comparison(): +def test_comparison(): # Test fingerprint comparison between similar molecules mol1 = smiles('CCO') mol2 = smiles('CCC') diff --git a/chython/algorithms/fingerprints/test/test_morgan.py b/chython/algorithms/fingerprints/test/test_morgan.py index c6407e9f..5b32e21f 100644 --- a/chython/algorithms/fingerprints/test/test_morgan.py +++ b/chython/algorithms/fingerprints/test/test_morgan.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin +# Copyright 2025 Ramil Nugmanov +# Copyright 2025 Tagir Akhmetshin # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,292 +17,63 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from chython import smiles, ReactionContainer +from chython import smiles import numpy as np -from pytest import mark -import pytest +from pytest import mark, raises -def test_morgan_fingerprint(): +def test_basic(): # Test basic fingerprint generation mol = smiles('CCO') fp = mol.morgan_fingerprint(min_radius=1, max_radius=2, length=1024) - + + # Test array properties assert isinstance(fp, np.ndarray) assert fp.dtype == np.uint8 assert fp.shape == (1024,) - assert fp.sum() > 0 # Should have some bits set - - # Test different lengths - fp2 = mol.morgan_fingerprint(length=2048) - assert fp2.shape == (2048,) - - # Test different number of active bits - fp3 = mol.morgan_fingerprint(number_active_bits=3) - assert fp3.sum() >= fp.sum() # Should have more or equal bits set - - -def test_morgan_bit_set(): - mol = smiles('CCO') - bits = mol.morgan_bit_set(min_radius=1, max_radius=2, length=1024) - - assert isinstance(bits, set) - assert len(bits) > 0 - assert all(isinstance(x, int) for x in bits) - assert all(0 <= x < 1024 for x in bits) - - # Test with different parameters - bits2 = mol.morgan_bit_set(length=2048, number_active_bits=3) - assert all(0 <= x < 2048 for x in bits2) - assert len(bits2) >= len(bits) # Should have more or equal bits - - -def test_morgan_hash_set(): - mol = smiles('CCO') - hashes = mol.morgan_hash_set(min_radius=1, max_radius=2) - - assert isinstance(hashes, set) - assert len(hashes) > 0 - assert all(isinstance(x, int) for x in hashes) - - -def test_morgan_hash_smiles(): - mol = smiles('CCO') - hash_smiles = mol.morgan_hash_smiles(min_radius=1, max_radius=2) - - assert isinstance(hash_smiles, dict) - assert len(hash_smiles) > 0 - assert all(isinstance(k, int) for k in hash_smiles) - assert all(isinstance(v, list) for v in hash_smiles.values()) - assert all(isinstance(s, str) for v in hash_smiles.values() for s in v) + # Test binary nature + assert set(np.unique(fp)).issubset({0, 1}) -def test_morgan_smiles_hash(): - mol = smiles('CCO') - smiles_hash = mol.morgan_smiles_hash(min_radius=1, max_radius=2) - - assert isinstance(smiles_hash, dict) - assert len(smiles_hash) > 0 - assert all(isinstance(k, str) for k in smiles_hash) - assert all(isinstance(v, list) for v in smiles_hash.values()) - assert all(isinstance(h, int) for v in smiles_hash.values() for h in v) + # Test different lengths + fp = mol.morgan_fingerprint(length=2048) + assert isinstance(fp, np.ndarray) + assert fp.dtype == np.uint8 + assert fp.shape == (2048,) -@mark.parametrize('radius', [(0, 1), (1, 0), (-1, 2)]) +@mark.parametrize('radius', [(0, 1), (1, 0), (-1, 2), (2, 1)]) def test_invalid_radius(radius): mol = smiles('CCO') min_r, max_r = radius - try: + with raises(AssertionError): mol.morgan_fingerprint(min_radius=min_r, max_radius=max_r) - assert False, "Should raise AssertionError" - except AssertionError: - pass - - -def test_complex_molecule(): - # Test with a more complex molecule containing rings and multiple atom types - mol = smiles('c1ccccc1CC(=O)O') - - fp1 = mol.morgan_fingerprint(min_radius=1, max_radius=3) - fp2 = mol.morgan_fingerprint(min_radius=1, max_radius=4) - - assert fp1.sum() < fp2.sum() # More radius should capture more features - - # Test hash consistency - hash_set1 = mol.morgan_hash_set(min_radius=1, max_radius=2) - hash_set2 = mol.morgan_hash_set(min_radius=1, max_radius=2) - assert hash_set1 == hash_set2 # Should be deterministic - - -def test_morgan_fingerprint_numpy(): - # Test numpy array properties of Morgan fingerprints - mol = smiles('CCO') - fp = mol.morgan_fingerprint(min_radius=1, max_radius=2, length=1024) - - # Test array type and shape - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - assert fp.shape == (1024,) - - # Test binary nature - assert set(np.unique(fp)).issubset({0, 1}) - - # Test different lengths - fp_2048 = mol.morgan_fingerprint(length=2048) - assert fp_2048.shape == (2048,) - assert fp_2048.dtype == np.uint8 - - # Test different number of active bits - fp_more_bits = mol.morgan_fingerprint(number_active_bits=4) - assert fp_more_bits.sum() >= fp.sum() -def test_morgan_fingerprint_consistency(): +def test_consistency(): # Test that fingerprints are consistent for the same molecule - mol = smiles('CCO') - fp1 = mol.morgan_fingerprint() - fp2 = mol.morgan_fingerprint() - + fp1 = smiles('CCO').morgan_fingerprint() + fp2 = smiles('OCC').morgan_fingerprint() + # Test exact equality of arrays - np.testing.assert_array_equal(fp1, fp2) - - # Test different molecules give different fingerprints - mol2 = smiles('CCC') - fp3 = mol2.morgan_fingerprint() + assert np.array_equal(fp1, fp2) + + # Test different molecules give different fingerprints) + fp3 = smiles('CCC').morgan_fingerprint() assert not np.array_equal(fp1, fp3) -def test_morgan_fingerprint_parameters(): +def test_parameters(): mol = smiles('CCO') - + # Test different radius parameters fp1 = mol.morgan_fingerprint(min_radius=1, max_radius=2) fp2 = mol.morgan_fingerprint(min_radius=1, max_radius=3) - assert fp2.sum() >= fp1.sum() # More radius should capture more features - - # Test power of 2 lengths - for length in [128, 256, 512, 1024, 2048, 4096]: - fp = mol.morgan_fingerprint(length=length) - assert fp.shape == (length,) - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - - -def test_morgan_fingerprint_arbitrary_length(): - # Test that non-power-of-2 lengths work but might have unexpected behavior - mol = smiles('CCO') - lengths = [100, 1000, 1500, 3000] - - for length in lengths: - fp = mol.morgan_fingerprint(length=length) - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - assert fp.shape == (length,) - # The actual bits set might be fewer than expected due to masking - assert 0 <= fp.sum() <= length - - -def test_complex_molecule_clean2d(): - # Test with more complex molecule - mol = smiles('c1ccccc1CC(=O)O') - mol.clean2d() - - # Check coordinates exist - assert all(n in mol._plane for n in mol) - - # Verify ring atoms are roughly coplanar - ring_atoms = [n for n in mol if len(mol._bonds[n]) == 2] - if ring_atoms: - coords = [mol._plane[n] for n in ring_atoms] - # Calculate variance in y coordinates - should be small for planar ring - y_coords = [y for _, y in coords] - y_mean = sum(y_coords) / len(y_coords) - y_variance = sum((y - y_mean) ** 2 for y in y_coords) / len(y_coords) - assert y_variance < 1.0 # reasonable threshold for planarity - - -def test_disconnected_components(): - # Test molecule with multiple disconnected components - mol = smiles('CCO.c1ccccc1') - mol.clean2d() - - # Check all atoms have coordinates - assert all(n in mol._plane for n in mol) - - # Components should be separated in space - components = list(mol.connected_components) - assert len(components) == 2 - - # Get bounding boxes for each component - def get_bounds(atoms): - xs = [mol._plane[n][0] for n in atoms] - ys = [mol._plane[n][1] for n in atoms] - return min(xs), max(xs), min(ys), max(ys) - - bounds1 = get_bounds(components[0]) - bounds2 = get_bounds(components[1]) - - # Check components don't overlap in x-direction - assert bounds1[1] < bounds2[0] or bounds2[1] < bounds1[0] - - -def test_reaction_clean2d(): - # Create a simple reaction - reactant = smiles('CCO') - product = smiles('CC=O') - reaction = ReactionContainer([reactant], [product]) - - # Clean coordinates - reaction.clean2d() - - # Check that all molecules have coordinates - for molecule in reaction.molecules(): - assert all(n in molecule._plane for n in molecule) - - # Check that reactants are positioned before products - reactant_max_x = max(x for mol in reaction.reactants - for x, _ in mol._plane.values()) - product_min_x = min(x for mol in reaction.products - for x, _ in mol._plane.values()) - assert reactant_max_x < product_min_x - - # Check arrow exists and is positioned between reactants and products - assert hasattr(reaction, '_arrow') - arrow_start, arrow_end = reaction._arrow - assert reactant_max_x < arrow_start < arrow_end < product_min_x - - -def test_reaction_with_reagents(): - # Create reaction with reagents - reactant = smiles('CCO') - reagent = smiles('Cl') - product = smiles('CCCl') - reaction = ReactionContainer([reactant], [reagent], [product]) - - reaction.clean2d() - - # Check all molecules have coordinates - for molecule in reaction.molecules(): - assert all(n in molecule._plane for n in molecule) - - # Check reagents are positioned above the arrow - reagent_coords = [(x, y) for mol in reaction.reagents - for x, y in mol._plane.values()] - assert all(y > 0 for _, y in reagent_coords) # reagents should be above - - # Verify arrow position - arrow_start, arrow_end = reaction._arrow - assert arrow_start < arrow_end - - # Check relative positioning - reactant_max_x = max(x for mol in reaction.reactants - for x, _ in mol._plane.values()) - product_min_x = min(x for mol in reaction.products - for x, _ in mol._plane.values()) - assert reactant_max_x < arrow_start < arrow_end < product_min_x - - -def test_fix_positions(): - # Test just the position fixing functionality - reaction = ReactionContainer([smiles('CCO')], [smiles('CC=O')]) - - # Clean individual molecules first - for mol in reaction.molecules(): - mol.clean2d() - - # Then fix positions - reaction.fix_positions() - - # Check arrow exists - assert hasattr(reaction, '_arrow') - - # Check molecules are properly spaced - reactant_coords = [(x, y) for mol in reaction.reactants - for x, y in mol._plane.values()] - product_coords = [(x, y) for mol in reaction.products - for x, y in mol._plane.values()] - - # Verify no overlap between reactants and products - reactant_max_x = max(x for x, _ in reactant_coords) - product_min_x = min(x for x, _ in product_coords) - assert reactant_max_x < product_min_x \ No newline at end of file + assert not np.array_equal(fp1, fp2) + assert np.array_equal(fp1 & fp2, fp1) + + # Test number of active bits + fp3 = mol.morgan_fingerprint(number_active_bits=2) + fp4 = mol.morgan_fingerprint(number_active_bits=3) + assert not np.array_equal(fp3, fp4) + assert np.array_equal(fp3 & fp4, fp3) From 4512e4edc3a49a5e23d1ae4ed8f7e620ae7ac5db Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 2 Feb 2025 15:45:56 +0100 Subject: [PATCH 63/68] isomorhism tests refactored. partially covered only mol to mol isomorphism --- chython/algorithms/mapping/test/__init__.py | 18 --- .../algorithms/mapping/test/test_mapping.py | 128 ------------------ chython/algorithms/test/test_isomorphism.py | 58 ++++++++ 3 files changed, 58 insertions(+), 146 deletions(-) delete mode 100644 chython/algorithms/mapping/test/__init__.py delete mode 100644 chython/algorithms/mapping/test/test_mapping.py create mode 100644 chython/algorithms/test/test_isomorphism.py diff --git a/chython/algorithms/mapping/test/__init__.py b/chython/algorithms/mapping/test/__init__.py deleted file mode 100644 index 031c963a..00000000 --- a/chython/algorithms/mapping/test/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# diff --git a/chython/algorithms/mapping/test/test_mapping.py b/chython/algorithms/mapping/test/test_mapping.py deleted file mode 100644 index b77c823e..00000000 --- a/chython/algorithms/mapping/test/test_mapping.py +++ /dev/null @@ -1,128 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from chython import smiles - - -def test_basic_mapping(): - # Test basic atom mapping in simple molecules - mol1 = smiles('CC(=O)O') # acetic acid - mol2 = smiles('CC(=O)O') # acetic acid - - mappings = list(mol1.get_mapping(mol2)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(mol1) # all atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_substructure_mapping(): - # Test mapping of a substructure - mol = smiles('CC(=O)OC') # methyl acetate - substructure = smiles('CC(=O)O') # acetic acid pattern - - mappings = list(substructure.get_mapping(mol)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(substructure) # all substructure atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_multiple_mappings(): - # Test cases where multiple valid mappings exist - mol = smiles('CC(=O)CC(=O)C') # 2,4-pentanedione - pattern = smiles('CC(=O)C') # acetone pattern - - mappings = list(pattern.get_mapping(mol)) - assert len(mappings) > 1 # should find multiple matches - assert all(len(m) == len(pattern) for m in mappings) # each mapping should cover all pattern atoms - - -def test_aromatic_mapping(): - # Test mapping with aromatic systems - benzene = smiles('c1ccccc1') - toluene = smiles('Cc1ccccc1') - - mappings = list(benzene.get_mapping(toluene)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(benzene) # all benzene atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_reaction_mapping(): - # Test mapping in reaction context - reactant = smiles('CC(=O)O') # acetic acid - product = smiles('CC(=O)OC') # methyl acetate - - mappings = list(reactant.get_mapping(product)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(reactant) # all reactant atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_complex_mapping(): - # Test mapping with complex molecules - mol1 = smiles('CC1=C(C(=O)C2=C(C1=O)N3CC4=C(C3(CC2)C)NC5=CC=CC=C54)C') # complex structure - mol2 = smiles('CC1=C(C(=O)C2=C(C1=O)N3CC4=C(C3(CC2)C)NC5=CC=CC=C54)C') # same structure - - mappings = list(mol1.get_mapping(mol2)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(mol1) # all atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_mapping_with_different_bonds(): - # Test mapping when bond orders differ - mol1 = smiles('C=CC=C') # 1,3-butadiene - mol2 = smiles('C=CC=C') # 1,3-butadiene - - # Should find mapping for identical molecules - mappings = list(mol1.get_mapping(mol2)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(mol1) - - # Verify that the mapping preserves atom connectivity and bond orders - for atom1, atom2 in mapping.items(): - # Check that the number of neighbors is the same - assert len(mol1._bonds[atom1]) == len(mol2._bonds[atom2]) - # Check that bond orders are preserved - mol1_orders = {mol1._bonds[atom1][x].order for x in mol1._bonds[atom1]} - mol2_orders = {mol2._bonds[atom2][x].order for x in mol2._bonds[atom2]} - assert mol1_orders == mol2_orders - - -def test_mapping_with_charges(): - # Test mapping with charged atoms - mol1 = smiles('C[NH3+]') # methylammonium - mol2 = smiles('C[NH3+]') # methylammonium - - # Should find mapping for identical molecules - mappings = list(mol1.get_mapping(mol2)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(mol1) - - # Verify that the mapping preserves atom connectivity and charges - for atom1, atom2 in mapping.items(): - assert mol1._bonds[atom1].keys() == mol2._bonds[atom2].keys() - assert mol1._charges[atom1] == mol2._charges[atom2] \ No newline at end of file diff --git a/chython/algorithms/test/test_isomorphism.py b/chython/algorithms/test/test_isomorphism.py new file mode 100644 index 00000000..4cb2c141 --- /dev/null +++ b/chython/algorithms/test/test_isomorphism.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# Copyright 2025 Tagir Akhmetshin +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles + + +def test_basic(): + # Test basic atom mapping in simple molecules + mol1 = smiles('CC(=O)O') # acetic acid + mol2 = smiles('CC(O)=O') # acetic acid + + assert mol1 <= mol2 + assert mol2 <= mol1 + + mappings = list(mol1.get_mapping(mol2)) + assert len(mappings) == 1 + assert mappings[0] == {1: 1, 2: 2, 3: 4, 4: 3} + assert not smiles('CC(O)O') <= mol1 + assert smiles('C[O-]') <= smiles('CC[O-]') + assert not smiles('C[O-]') <= smiles('CCO') + + +def test_substructure_mapping(): + # Test mapping of a substructure + mol = smiles('CCC(=O)OC') + substructure = smiles('CC(=O)O') + + assert substructure < mol + mappings = list(substructure.get_mapping(mol)) + assert len(mappings) == 1 + assert mappings[0] == {1: 2, 2: 3, 3: 4, 4: 5} + + +def test_multiple_mappings(): + # Test cases where multiple valid mappings exist + mol = smiles('CC(=O)OC(=O)C') + pattern = smiles('CC(=O)O') # acetone pattern + + mappings = list(pattern.get_mapping(mol)) + assert len(mappings) == 2 # should find multiple matches + assert {1: 1, 2: 2, 3: 3, 4: 4} in mappings + assert {1: 7, 2: 5, 3: 6, 4: 4} in mappings From ff105b01a9bf36c0241874fd026d9f401fc51def Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 2 Feb 2025 15:50:39 +0100 Subject: [PATCH 64/68] dropped outdated python versions. no need for multipython tests. --- .github/workflows/tests.yml | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6d4a04c6..9c44817e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,39 +13,15 @@ jobs: matrix: include: # Linux builds (required) - - os: ubuntu-latest - python-version: "3.8" - extra_deps: "importlib_resources" - experimental: false - - os: ubuntu-latest - python-version: "3.9" - extra_deps: "" - experimental: false - os: ubuntu-latest python-version: "3.10" extra_deps: "" experimental: false - - os: ubuntu-latest - python-version: "3.11" - extra_deps: "" - experimental: false # Windows builds (allowed to fail) - - os: windows-latest - python-version: "3.8" - extra_deps: "importlib_resources" - experimental: true - - os: windows-latest - python-version: "3.9" - extra_deps: "" - experimental: true - os: windows-latest python-version: "3.10" extra_deps: "" experimental: true - - os: windows-latest - python-version: "3.11" - extra_deps: "" - experimental: true runs-on: ${{ matrix.os }} continue-on-error: ${{ matrix.experimental }} @@ -152,10 +128,10 @@ jobs: retention-days: 7 - name: Upload coverage to Codecov - if: runner.os == 'Linux' && matrix.python-version == '3.11' + if: runner.os == 'Linux' && matrix.python-version == '3.10' uses: codecov/codecov-action@v3 with: file: test-results/coverage.xml flags: unittests name: codecov-umbrella - fail_ci_if_error: false \ No newline at end of file + fail_ci_if_error: false From c1081faa48e3ce0261580f44023320242dff9382 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 8 Feb 2025 19:20:12 +0100 Subject: [PATCH 65/68] smiles generation tests refactored --- chython/algorithms/test/test_smiles.py | 150 ++++++------------------- 1 file changed, 33 insertions(+), 117 deletions(-) diff --git a/chython/algorithms/test/test_smiles.py b/chython/algorithms/test/test_smiles.py index 6e54a022..b61606ae 100644 --- a/chython/algorithms/test/test_smiles.py +++ b/chython/algorithms/test/test_smiles.py @@ -22,139 +22,55 @@ def test_basic_smiles(): # Test basic SMILES generation - mol = smiles('CCO') # ethanol - assert 'C' in str(mol) and 'O' in str(mol) # check presence of atoms - + mol = smiles('CO') # methanol + assert str(mol) in ('CO', 'OC') + mol = smiles('c1ccccc1') # benzene - assert 'c1ccccc1' in str(mol) # aromatic representation + assert str(mol) == 'c1ccccc1' def test_format_options(): # Test different format options - mol = smiles('c1ccccc1') - + mol = smiles('C=1C=CC=CC=1') + # Test asymmetric closures - assert mol.__format__('a').startswith('c') - + assert str(mol) == 'C=1C=CC=CC=1' + assert format(mol, 'a') == 'C=1C=CC=CC1' + + assert format(mol, '!b') == 'C1CCCCC1' + # Test disable stereo - chiral_mol = smiles('C[C@H](O)CC') - assert '@' not in chiral_mol.__format__('!s') - - # Test aromatic bonds - kekulized = mol.__format__('A') - assert 'c' not in kekulized # should not contain aromatic atoms - - # Test atom mapping - assert ':' in mol.__format__('m') # atom mapping numbers present - - # Test random ordering - mol_str = str(mol) - random_smiles = mol.__format__('r') - assert len(random_smiles) > 0 # valid SMILES generated - - -def test_smiles_atoms_order(): - # Test atoms order property - mol = smiles('CCO') - order = mol.smiles_atoms_order - assert isinstance(order, tuple) - assert len(order) == 3 # number of atoms - assert all(isinstance(x, int) for x in order) + mol = smiles('C[C@H](O)CC') + assert '@' in str(mol) + assert '@' not in format(mol, '!s') + mol = smiles('c1ccccc1') + assert format(mol, 'A') == 'C:1:C:C:C:C:C:1' + assert format(mol, 'Aa') == 'C:1:C:C:C:C:C1' + assert format(mol, 'm') == '[cH:1]1[cH:2][cH:3][cH:4][cH:5][cH:6]1' + assert format(mol, 'h') == '[cH]1[cH][cH][cH][cH][cH]1' + + assert format(mol, 'Ah') == '[CH]:1:[CH]:[CH]:[CH]:[CH]:[CH]:1' + assert format(mol, 'Ah!b') == '[CH]1[CH][CH][CH][CH][CH]1' + + mol = smiles('[K+]') + assert str(mol) == '[K+]' + assert format(mol, '!z') == '[K]' -def test_molecule_smiles(): - # Test MoleculeSmiles specific functionality - mol = smiles('CCO') - atoms = list(mol._atoms.keys()) # get actual atom indices - - # Test sticky smiles generation - sticky = mol.sticky_smiles(atoms[0]) # fix first atom - assert sticky and isinstance(sticky, str) - - # Test sticky smiles with both ends - sticky_both = mol.sticky_smiles(atoms[0], atoms[-1]) # fix first and last atoms - assert sticky_both and isinstance(sticky_both, str) - - -def test_complex_structures(): - # Test complex molecular structures - mol = smiles('C1CC(=O)NC(=O)C1') # cyclic peptide - assert all(x in str(mol) for x in ('C', 'N', '=O')) # check for expected fragments - - mol = smiles('C[C@H](N)C(=O)O') # amino acid - assert '@' in str(mol) # stereo information preserved - - -def test_charged_species(): - # Test charged molecules - mol = smiles('[NH4+]') # ammonium - assert '+' in str(mol) - - mol = smiles('[OH-]') # hydroxide - assert '-' in str(mol) - - -def test_radical_species(): - # Test radical species mol = smiles('[CH3]') - assert '[' in str(mol) and ']' in str(mol) # bracketed form - - # Test with format options - assert '[' in mol.__format__('h') # show hydrogens - - -def test_cgr_smiles(): - # Test CGR SMILES functionality - mol = smiles('CC>>CCC') # dynamic transformation - assert '>' in str(mol) - - # Test dynamic bonds - mol = smiles('C=C>>CC') - assert '=' in str(mol) - - -def test_query_smiles(): - # Test basic query atoms - mol = smiles('[C]') # carbon atom - assert len(mol) == 1 - - mol = smiles('[N]') # nitrogen atom - assert len(mol) == 1 - - mol = smiles('[O]') # oxygen atom - assert len(mol) == 1 - - mol = smiles('[H]') # hydrogen atom - assert len(mol) == 1 + assert str(mol) == '[CH3] |^1:0|' + assert format(mol, '!x') == '[CH3]' + + mol = smiles('CCO') + assert len({format(mol, 'r') for _ in range(50)}) == 4 def test_smiles_comparison(): # Test SMILES comparison functionality mol1 = smiles('CCO') - mol2 = smiles('CCO') + mol2 = smiles('OCC') mol3 = smiles('CCC') - + assert mol1 == mol2 # same molecules assert mol1 != mol3 # different molecules assert hash(mol1) == hash(mol2) # same hash for same molecules - - -def test_cxsmiles_extensions(): - # Test CXSMILES extensions - mol = smiles('[CH3]') # radical - assert mol.smiles # valid SMILES generated - - # Test without CXSMILES - assert mol.__format__('!x') # valid SMILES without extensions - - -def test_special_cases(): - # Test special cases and edge cases - mol = smiles('[H][H]') # hydrogen molecule - assert '[H]' in str(mol) - - mol = smiles('C#N') # triple bond - assert '#' in str(mol) - - mol = smiles('C~C') # any bond - assert '~' in str(mol) \ No newline at end of file From ddddc5a5fb9c27528c4362348936ea1c08dc05a4 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 8 Feb 2025 19:22:12 +0100 Subject: [PATCH 66/68] converter to rdkit mol fixed --- chython/utils/rdkit.py | 44 +++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/chython/utils/rdkit.py b/chython/utils/rdkit.py index bae12fd9..b159d18d 100644 --- a/chython/utils/rdkit.py +++ b/chython/utils/rdkit.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2022 Ramil Nugmanov +# Copyright 2019-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -138,24 +138,35 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): ra.SetNumRadicalElectrons(1) mapping[n] = mol.AddAtom(ra) + inverted = {v: k for k, v in mapping.items()} + for n, m, b in data.bonds(): if atoms[n].atomic_symbol not in _inorganic: n, m = m, n # fix direction of dative bond mol.AddBond(mapping[n], mapping[m], _bond_map[b.order]) - for n in data._atoms_stereo: + for n, a in data.atoms(): + if a.stereo is None: + continue + if n not in data.stereogenic_tetrahedrons: + continue # allenes are not supported ra = mol.GetAtomWithIdx(mapping[n]) - env = bonds[n] - s = data._translate_tetrahedron_sign(n, [x for x in mapping if x in env]) + env = [inverted[x.GetIdx()] for x in ra.GetNeighbors()] + s = data._translate_tetrahedron_sign(n, env) ra.SetChiralTag(_chiral_ccw if s else _chiral_cw) - for nm, s in data._cis_trans_stereo.items(): - n, m = nm - if m in bonds[n]: # cumulenes unsupported - nn, nm, *_ = data.stereogenic_cis_trans[nm] - b = mol.GetBondBetweenAtoms(mapping[n], mapping[m]) - b.SetStereoAtoms(mapping[nn], mapping[nm]) - b.SetStereo(_cis if s else _trans) + for n, m, b in data.bonds(): + if b.stereo is None: + continue + # check for simple cis-trans + nm = data._stereo_cis_trans_centers.get(n) + if nm is None or n not in nm or m not in nm: + continue + + n1, m1, *_ = data.stereogenic_cis_trans[nm] + rb = mol.GetBondBetweenAtoms(mapping[n], mapping[m]) + rb.SetStereoAtoms(mapping[n1], mapping[m1]) + rb.SetStereo(_cis if b.stereo else _trans) conf = Conformer() for n, a in data.atoms(): @@ -163,11 +174,12 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): conf.Set3D(False) mol.AddConformer(conf, assignId=True) - for c in data._conformers: - conf = Conformer() - for n, xyz in c.items(): - conf.SetAtomPosition(mapping[n], xyz) - mol.AddConformer(conf, assignId=True) + if hasattr(data, '_conformers'): + for c in data._conformers: + conf = Conformer() + for n, xyz in c.items(): + conf.SetAtomPosition(mapping[n], xyz) + mol.AddConformer(conf, assignId=True) SanitizeMol(mol) AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True) From 4a2343d5b592cb52c5b2b7d4ce99c7ae8de3cb93 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sun, 9 Feb 2025 13:43:10 +0100 Subject: [PATCH 67/68] reactor fixed. minimal tests added. test pipeline dropped. doesn't work at all. --- .github/workflows/tests.yml | 137 ----------------------- chython/reactor/base.py | 58 +++++----- chython/reactor/reactor.py | 56 ++++----- chython/reactor/test/test_reactor.py | 40 +++++++ chython/reactor/test/test_transformer.py | 40 +++++++ 5 files changed, 139 insertions(+), 192 deletions(-) delete mode 100644 .github/workflows/tests.yml create mode 100644 chython/reactor/test/test_reactor.py create mode 100644 chython/reactor/test/test_transformer.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 9c44817e..00000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,137 +0,0 @@ -name: Tests - -on: - push: - branches: [ main, master ] - pull_request: - branches: [ main, master ] - -jobs: - test: - strategy: - fail-fast: false - matrix: - include: - # Linux builds (required) - - os: ubuntu-latest - python-version: "3.10" - extra_deps: "" - experimental: false - # Windows builds (allowed to fail) - - os: windows-latest - python-version: "3.10" - extra_deps: "" - experimental: true - - runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.experimental }} - - steps: - - uses: actions/checkout@v3 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' # Cache pip dependencies - - - name: Install system dependencies (Linux) - if: runner.os == 'Linux' - run: | - sudo apt-get update - sudo apt-get install -y build-essential gcc g++ gfortran - gcc --version # Print version for debugging - - - name: Install system dependencies (Windows) - if: runner.os == 'Windows' - run: | - choco install visualstudio2019buildtools --package-parameters "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64" - - - name: Install Cython and core dependencies - run: | - python -m pip install --upgrade pip - pip install "cython>=3.0.5" setuptools wheel - if [ "${{ matrix.extra_deps }}" != "" ]; then - pip install ${{ matrix.extra_deps }} - fi - shell: bash - - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: 1.7.1 - virtualenvs-create: true - virtualenvs-in-project: true - - - name: Configure Poetry (Windows) - if: runner.os == 'Windows' - run: | - $env:Path += ";$env:APPDATA\Python\Scripts" - poetry config installer.max-workers 4 - shell: pwsh - - - name: Configure Poetry (Linux) - if: runner.os == 'Linux' - run: | - poetry config installer.max-workers 4 - shell: bash - - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v3 - with: - path: .venv - key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('**/*.pyx') }} - restore-keys: | - venv-${{ runner.os }}-${{ matrix.python-version }}- - - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --with dev - env: - PATH: ${{ env.PATH }};${{ env.APPDATA }}\Python\Scripts - timeout-minutes: 15 - - - name: Clean Cython build - shell: bash - run: | - rm -rf build/ - rm -rf *.so - find . -name "*.c" -delete || true - - - name: Force Cython recompilation - run: poetry run pip install -e . --no-deps --force-reinstall - - - name: Install test dependencies - run: | - poetry run pip install pytest-html pytest-cov - shell: bash - - - name: Run tests - run: | - mkdir -p test-results - poetry run pytest tests/ -v --durations=10 --cache-clear \ - --junitxml=test-results/junit.xml \ - --html=test-results/report.html \ - --cov=chython \ - --cov-report=html:test-results/coverage \ - --cov-report=xml:test-results/coverage.xml - - - name: Upload test results - if: always() - uses: actions/upload-artifact@v3 - with: - name: test-results-${{ matrix.os }}-py${{ matrix.python-version }} - path: | - test-results/ - .pytest_cache/ - retention-days: 7 - - - name: Upload coverage to Codecov - if: runner.os == 'Linux' && matrix.python-version == '3.10' - uses: codecov/codecov-action@v3 - with: - file: test-results/coverage.xml - flags: unittests - name: codecov-umbrella - fail_ci_if_error: false diff --git a/chython/reactor/base.py b/chython/reactor/base.py index ca39685a..65a5966e 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2024 Ramil Nugmanov +# Copyright 2014-2025 Ramil Nugmanov # Copyright 2019 Adelia Fatykhova # This file is part of chython. # @@ -29,6 +29,8 @@ def __init__(self, pattern, replacement, delete_atoms, fix_rings, fix_tautomers) for n, a in replacement.atoms(): if not isinstance(a, (AnyElement, QueryElement)): raise TypeError('Unsupported query atom type') + elif len(a.implicit_hydrogens) > 1: + raise ValueError('Query element in patch has more than one implicit hydrogen clause') for *_, b in replacement.bonds(): if len(b.order) > 1: raise ValueError('Variable bond in replacement') @@ -70,14 +72,14 @@ def _get_deleted(self, structure, mapping): return to_delete def _patcher(self, structure: MoleculeContainer, mapping): - atoms = structure._atoms - bonds = structure._bonds + satoms = structure._atoms + sbonds = structure._bonds to_delete = self._get_deleted(structure, mapping) new = structure.__class__() natoms = new._atoms nbonds = new._bonds - max_atom = max(atoms) + max_atom = max(satoms) stereo_atoms = [] stereo_bonds = [] @@ -87,7 +89,7 @@ def _patcher(self, structure: MoleculeContainer, mapping): if isinstance(ra, AnyElement): if m := mapping.get(n): # keep matched atom type and isotope - sa = atoms[m] + sa = satoms[m] a = sa.copy() a.charge = ra.charge a.is_radical = ra.is_radical @@ -103,21 +105,16 @@ def _patcher(self, structure: MoleculeContainer, mapping): a = e(ra.isotope, charge=ra.charge, is_radical=ra.is_radical) if not (m := mapping.get(n)): # new atom m = max_atom + 1 - max_atom += 1 - mapping[n] = m + mapping[n] = max_atom = m a._stereo = ra.stereo # keep stereo from patch for new atoms if isinstance(ra, Element): a._implicit_hydrogens = ra.implicit_hydrogens # keep H count from patch - a.x = ra.x # keep coordinates from patch - a.y = ra.y - elif len(ra.implicit_hydrogens) == 1: # keep H count from patch + a.xy = ra.xy # keep coordinates from patch + elif ra.implicit_hydrogens: # keep H count from patch a._implicit_hydrogens = ra.implicit_hydrogens[0] - elif ra.implicit_hydrogens: - raise ValueError('Query element in patch has more than one implicit hydrogen') else: # existing atoms - sa = atoms[m] - a.x = sa.x # preserve existing coordinates - a.y = sa.y + sa = satoms[m] + a.xy = sa.xy # preserve existing coordinates if ra.stereo is not None: a._stereo = ra.stereo elif sa.stereo is not None: # keep original stereo @@ -136,25 +133,26 @@ def _patcher(self, structure: MoleculeContainer, mapping): nbonds[n][m] = b = Bond(int(rb)) if rb.stereo is not None: # override stereo b._stereo = rb.stereo - elif (sbn := bonds.get(n)) is None or (sb := sbn.get(m)) is None or sb.stereo is None: + # check bond exists in source and has stereo label and the same order + elif (sbn := sbonds.get(n)) is None or (sb := sbn.get(m)) is None or sb.stereo is None or sb != b: continue else: # original structure has stereo bond stereo_bonds.append((n, m)) patched_atoms = set(new) - for n, sa in atoms.items(): # add unmatched or masked atoms + for n, sa in satoms.items(): # add unmatched or masked atoms if n not in patched_atoms and n not in to_delete: natoms[n] = a = sa.copy(hydrogens=True) nbonds[n] = {} if sa.stereo is not None: # in case of allenes label can disappear/change, thus, requires recalculation # for tetrahedrons label can be stored as is - if len(bonds[n]) >= 3: + if n in structure.stereogenic_tetrahedrons: a._stereo = sa.stereo else: stereo_atoms.append(n) - for n, bs in bonds.items(): # preserve connectivity order for keeping stereo labels as is + for n, bs in sbonds.items(): # preserve connectivity order for keeping stereo labels as is if n in to_delete: # atoms for removing continue for m, b in bs.items(): @@ -177,23 +175,29 @@ def _patcher(self, structure: MoleculeContainer, mapping): # translate stereo sign from old order to new order for n in stereo_atoms: if n in new.stereogenic_tetrahedrons: - if bonds[n].keys() == nbonds[n].keys(): + if sbonds[n].keys() == nbonds[n].keys(): # flush stereo from reaction center. should be explicitly set in replacement. - s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], atoms[n].stereo) + s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], satoms[n].stereo) natoms[n]._stereo = s elif n in new.stereogenic_allenes: if set(new.stereogenic_allenes[n]) == set(structure.stereogenic_allenes[n]): # flush stereo for changed allene substituents - s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], atoms[n].stereo) + s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], satoms[n].stereo) natoms[n]._stereo = s # else: ignore label for n, m in stereo_bonds: - if (t12 := new._stereo_cis_trans_terminals.get(n, True)) == new._stereo_cis_trans_terminals.get(m, False): - if set(new.stereogenic_cis_trans[t12]) == set(env := structure.stereogenic_cis_trans[t12]): - # connected to cumulenes atoms should be the same - s = new._translate_cis_trans_sign(*t12, *env[:2], bonds[n][m].stereo) - nbonds[n][m]._stereo = s + # check if bond is center of cumulene + if (n12 := new._stereo_cis_trans_terminals.get(n, True)) != new._stereo_cis_trans_terminals.get(m, False): + continue + s12 = structure._stereo_cis_trans_terminals[n] + # check if cumulene terminals are the same + if set(n12) != set(s12): + continue + if set(new.stereogenic_cis_trans[n12]) == set(env := structure.stereogenic_cis_trans[s12]): + # connected to cumulenes atoms should be the same + s = new._translate_cis_trans_sign(*n12, *env[:2], sbonds[n][m].stereo) + nbonds[n][m]._stereo = s # else: ignore label if self._fix_rings: diff --git a/chython/reactor/reactor.py b/chython/reactor/reactor.py index ce74a6d1..ab633227 100644 --- a/chython/reactor/reactor.py +++ b/chython/reactor/reactor.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2025 Ramil Nugmanov # Copyright 2019 Adelia Fatykhova # This file is part of chython. # @@ -62,29 +62,28 @@ def __init__(self, patterns: Tuple[QueryContainer, ...], raise TypeError('invalid params') elif not all(isinstance(x, (QueryContainer, MoleculeContainer)) for x in products): raise TypeError('invalid params') - self.patterns = patterns - self.products = products + self._patterns = patterns + self._products = products - self.__one_shot = one_shot - self.__polymerise_limit = polymerise_limit - self.__products_atoms = tuple(set(m) for m in products) - self.__automorphism_filter = automorphism_filter - super().__init__({n for x in patterns for n, a in x.atoms() if not a.masked}, reduce(or_, products), - delete_atoms, fix_aromatic_rings, fix_tautomers) + self._one_shot = one_shot + self._polymerise_limit = polymerise_limit + self._products_atoms = tuple(set(m) for m in products) + self._automorphism_filter = automorphism_filter + super().__init__(reduce(or_, patterns), reduce(or_, products), delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, *structures: MoleculeContainer): if any(not isinstance(structure, MoleculeContainer) for structure in structures): raise TypeError('only list of Molecules possible') - len_patterns = len(self.patterns) + len_patterns = len(self._patterns) structures = fix_mapping_overlap(structures) s_nums = set(range(len(structures))) seen = set() - if self.__one_shot: + if self._one_shot: for chosen in permutations(s_nums, len_patterns): ignored = [structures[x] for x in s_nums.difference(chosen)] chosen = [structures[x] for x in chosen] - for new in self.__single_stage(chosen, {x for x in ignored for x in x}): + for new in self._single_stage(chosen, {x for x in ignored for x in x}): # store reacted molecules in same order as matched pattern r = ReactionContainer([x.copy() for x in chosen] + [x.copy() for x in ignored], new + [x.copy() for x in ignored]) @@ -100,14 +99,14 @@ def __call__(self, *structures: MoleculeContainer): while queue: chosen, ignored, depth = queue.popleft() depth += 1 - for new in self.__single_stage(chosen, {x for x in ignored for x in x}): + for new in self._single_stage(chosen, {x for x in ignored for x in x}): r = ReactionContainer([x.copy() for x in structures], new + [x.copy() for x in ignored]) if len(new) > 1: r.contract_ions() # try to keep salts if str(r) in seen: continue seen.add(str(r)) - if len(r.products) != len(ignored) + len(self.__products_atoms): + if len(r.products) != len(ignored) + len(self._products_atoms): logger.info('ambiguous multicomponent structures. skip multistage processing') yield r continue @@ -116,7 +115,7 @@ def __call__(self, *structures: MoleculeContainer): else: seen.add(str(r)) - if depth < self.__polymerise_limit: + if depth < self._polymerise_limit: prod = r.products if len_patterns == 1: # simple case. only products or ignored can be transformed. for i in range(len(prod)): @@ -128,26 +127,26 @@ def __call__(self, *structures: MoleculeContainer): queue.append((ch, [*prod[:i], *prod[i + 1:]], depth)) yield r - def __single_stage(self, chosen, ignored) -> Iterator[List[MoleculeContainer]]: + def _single_stage(self, chosen, ignored) -> Iterator[List[MoleculeContainer]]: max_ignored_number = united_chosen = None - split = len(self.__products_atoms) > 1 - for match in lazy_product(*(x.get_mapping(y, automorphism_filter=self.__automorphism_filter) for x, y in - zip(self.patterns, chosen))): + split = len(self._products_atoms) > 1 + for match in lazy_product(*(x.get_mapping(y, automorphism_filter=self._automorphism_filter) for x, y in + zip(self._patterns, chosen))): mapping = match[0].copy() for m in match[1:]: mapping.update(m) if united_chosen is None: united_chosen = reduce(or_, chosen) max_ignored_number = max(ignored, default=0) - for new in self._patcher(united_chosen, mapping): - collision = set(new).intersection(ignored) - if collision: - new.remap(dict(zip(collision, count(max(max_ignored_number, max(new)) + 1)))) + new = self._patcher(united_chosen, mapping) + collision = set(new).intersection(ignored) + if collision: + new.remap(dict(zip(collision, count(max(max_ignored_number, max(new)) + 1)))) - if split: - yield new.split() - else: - yield [new] + if split: + yield new.split() + else: + yield [new] def fix_mapping_overlap(structures) -> List[MoleculeContainer]: @@ -159,7 +158,8 @@ def fix_mapping_overlap(structures) -> List[MoleculeContainer]: intersection = set(structure).intersection(checked_atoms) if intersection: mapping = dict(zip(intersection, count(max(max(checked_atoms), max(structure)) + 1))) - structure = structure.remap(mapping, copy=True) + structure = structure.copy() + structure.remap(mapping) logger.info('some atoms in input structures had the same numbers.\n' f'atoms {list(mapping)} were remapped to {list(mapping.values())}') checked_atoms.update(structure) diff --git a/chython/reactor/test/test_reactor.py b/chython/reactor/test/test_reactor.py new file mode 100644 index 00000000..37b4268e --- /dev/null +++ b/chython/reactor/test/test_reactor.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles, smarts, Reactor +from pytest import mark + + +data = [ + (('[B;D3;x2;z1:4]([O:5])([O:6])-[C;@@;h1:3]1([O;M][C;M]1)', '[Cl,Br,I;D1:1]-[C;a:2]'), ('[A;@:3]-[A:2]',), + ('CC1O[C@@H]1B(O)O', 'Brc1ccccc1'), ('CC1O[C@H]1c1ccccc1',)), # inverse stereo check + (('[B;D3;x2;z1:4]([O:5])([O:6])-[C;@@;h1:3]1([O;M][C;M]1)', '[Cl,Br,I;D1:1]-[C;a:2]'), ('[A;@@:3]-[A:2]',), + ('CC1O[C@@H]1B(O)O', 'Brc1ccccc1'), ('CC1O[C@@H]1c1ccccc1',)), # keep stereo on RC + (('[B;D3;x2;z1:4]([O:5])([O:6])-[C;@@;h1:3]1([O;M][C;M]1)', '[Cl,Br,I;D1:1]-[C;a:2]'), ('[A:3]-[A:2]',), + ('CC1O[C@@H]1B(O)O', 'Brc1ccccc1'), ('CC1OC1c1ccccc1',)), # drop stereo on RC +] + + +@mark.parametrize('patterns, products, source, result', data) +def test_transformer(patterns, products, source, result): + for q, m in zip(patterns, source): + assert smarts(q) <= smiles(m) + + reactor = Reactor([smarts(x) for x in patterns], [smarts(x) for x in products]) + out = {format(smiles(x), 'h') for x in result} + assert {format(x, 'h') for x in next(reactor(*(smiles(x) for x in source))).products} == out diff --git a/chython/reactor/test/test_transformer.py b/chython/reactor/test/test_transformer.py new file mode 100644 index 00000000..c6b57b9c --- /dev/null +++ b/chython/reactor/test/test_transformer.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles, smarts, Transformer +from pytest import mark + + +data = [ + ('[C:1]Br', '[A:1][O;M]', 'C[C@H](OC)CBr', 'C[C@H](OC)CO'), # keep stereo out of match + ('[C:2][C:1]Br', '[A:2][A:1][O;M]', 'C[C@H](OC)CBr', 'C[C@H](OC)CO'), # keep stereo inside match + ('[C;M][C;@;h1:1]([O;M])[N;M]', '[A;@@:1]', 'CC[C@H](O)N', 'CC[C@@H](O)N'), # inversion of stereo + ('[C:1]Br', '[A:1][O;M]', 'C/C=C/CBr', 'C/C=C/CO'), # keep stereo out of match + ('[C:1]Br', '[A:1][O;M]', 'CC=[C@]=CCBr', 'CC=[C@]=CCO'), # keep + ('[C:1]Br', '[A:1][O;M]', 'CC=[C@]=CBr', 'CC=C=CO'), # drop stereo on RC + ('[C:1]Br', '[A:1][O;M]', 'C/C=C/Br', 'CC=CO'), # drop stereo on RC +] + + +@mark.parametrize('pattern, replacement, source, result', data) +def test_transformer(pattern, replacement, source, result): + transformer = Transformer(smarts(pattern), smarts(replacement)) + + mol = smiles(source) + out = {format(smiles(x), 'h') for x in ([result] if isinstance(result, str) else result)} + assert {format(x, 'h') for x in transformer(mol)} == out From c9609920b30b5b7430bc52cb92ce0be0585725ad Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sun, 9 Feb 2025 16:34:59 +0100 Subject: [PATCH 68/68] rdkit converter fixed. tests added. --- chython/utils/rdkit.py | 126 +++++++++++-------------------- chython/utils/test/__init__.py | 18 +++++ chython/utils/test/test_rdkit.py | 71 +++++++++++++++++ 3 files changed, 134 insertions(+), 81 deletions(-) create mode 100644 chython/utils/test/__init__.py create mode 100644 chython/utils/test/test_rdkit.py diff --git a/chython/utils/rdkit.py b/chython/utils/rdkit.py index b159d18d..aa8a17a9 100644 --- a/chython/utils/rdkit.py +++ b/chython/utils/rdkit.py @@ -18,7 +18,6 @@ # from rdkit.Chem import AssignStereochemistry, Atom, BondStereo, BondType, ChiralType, Conformer, RWMol, SanitizeMol from ..containers import MoleculeContainer -from ..exceptions import IsChiral, NotChiral, ValenceError from ..periodictable import Element @@ -27,89 +26,55 @@ def from_rdkit_molecule(data): RDKit molecule object to MoleculeContainer converter """ mol = MoleculeContainer() - parsed_mapping = mol._parsed_mapping - mol_conformers = mol._conformers - bonds = mol._bonds - atoms, mapping = [], [] + mapping = {} tetrahedron_stereo = [] - for a in data.GetAtoms(): - e = Element.from_symbol(a.GetSymbol()) - isotope = a.GetIsotope() - if isotope: - e = e(isotope) - else: - e = e() - atom = {'atom': e, 'charge': a.GetFormalCharge()} - - radical = a.GetNumRadicalElectrons() - if radical: - atom['is_radical'] = True - - atoms.append(atom) - mapping.append(a.GetAtomMapNum()) - tetrahedron_stereo.append(a.GetChiralTag()) - - conformers = [] - c = data.GetConformers() - if c: - for atom, (x, y, _) in zip(atoms, c[0].GetPositions()): - atom['xy'] = (x, y) - for c in c: - if c.Is3D(): - conformers.append(c.GetPositions()) - - new_map = [] - for a, n in zip(atoms, mapping): - a = mol.add_atom(**a) - new_map.append(a) - parsed_mapping[a] = n - - stereo = [] + for ra in data.GetAtoms(): + e = Element.from_symbol(ra.GetSymbol()) + a = e(ra.GetIsotope() or None, charge=ra.GetFormalCharge(), is_radical=bool(ra.GetNumRadicalElectrons()), + parsed_mapping=ra.GetAtomMapNum(), implicit_hydrogens=ra.GetNumExplicitHs() + ra.GetNumImplicitHs()) + mapping[ra.GetIdx()] = mol.add_atom(a, _skip_calculation=True) + s = ra.GetChiralTag() + if s in (_chiral_cw, _chiral_ccw): + tetrahedron_stereo.append((ra.GetIdx(), [x.GetIdx() for x in ra.GetNeighbors()], s == _chiral_ccw)) + + cis_trans_stereo = [] for b in data.GetBonds(): - n, m = new_map[b.GetBeginAtomIdx()], new_map[b.GetEndAtomIdx()] - mol.add_bond(n, m, _rdkit_bond_map[b.GetBondType()]) + n, m = mapping[b.GetBeginAtomIdx()], mapping[b.GetEndAtomIdx()] + mol.add_bond(n, m, _rdkit_bond_map[b.GetBondType()], _skip_calculation=True) s = b.GetStereo() - if s == _cis: + if s in (_cis, _trans): nn, nm = b.GetStereoAtoms() - stereo.append((mol.add_cis_trans_stereo, n, m, new_map[nn], new_map[nm], True)) - elif s == _trans: - nn, nm = b.GetStereoAtoms() - stereo.append((mol.add_cis_trans_stereo, n, m, new_map[nn], new_map[nm], False)) - - for n, s in zip(new_map, tetrahedron_stereo): - if s == _chiral_cw: - env = bonds[n] - env = [x for x in new_map if x in env] - stereo.append((mol.add_atom_stereo, n, env, False)) - elif s == _chiral_ccw: - env = bonds[n] - env = [x for x in new_map if x in env] - stereo.append((mol.add_atom_stereo, n, env, True)) - - while stereo: - fail_stereo = [] - old_stereo = len(stereo) - for f, *args in stereo: - try: - f(*args, clean_cache=False) - except NotChiral: - fail_stereo.append((f, *args)) - except IsChiral: - pass - except ValenceError: - mol.flush_cache() - break - else: - stereo = fail_stereo - if len(stereo) == old_stereo: - break - mol.flush_stereo_cache() - continue - break + cis_trans_stereo.append((n, m, mapping[nn], mapping[nm], s == _cis)) - for c in conformers: - mol_conformers.append({k: tuple(v) for k, v in zip(new_map, c)}) + if cs := data.GetConformers(): + # set coordinates from the first rdkit conformer. usually it's 2d layout + for (_, atom), (x, y, _) in zip(mol.atoms(), cs[0].GetPositions()): + atom.xy = (x, y) + + conformers = [] + for c in cs: + if c.Is3D(): + conformers.append({n: tuple(v) for n, v in enumerate(c.GetPositions(), 1)}) + if conformers: + mol._conformers = conformers + + # move stereo labels as is + for n, env, s in tetrahedron_stereo: + n = mapping[n] + try: + mol.atom(n)._stereo = mol._translate_tetrahedron_sign(n, [mapping[x] for x in env], s) + except KeyError: + pass + for n, m, nn, nm, s in cis_trans_stereo: + try: + mol.bond(n, m)._stereo = mol._translate_cis_trans_sign(n, m, nn, nm, s) + except KeyError: + pass + + mol.fix_structure(recalculate_hydrogens=False) + if tetrahedron_stereo or cis_trans_stereo: + mol.fix_stereo() return mol @@ -123,11 +88,10 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): """ mol = RWMol() mapping = {} - atoms = data._atoms - bonds = data._bonds for n, a in data.atoms(): ra = Atom(a.atomic_number) + ra.SetNumExplicitHs(a.implicit_hydrogens) if keep_mapping: ra.SetAtomMapNum(n) if a.charge: @@ -141,7 +105,7 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): inverted = {v: k for k, v in mapping.items()} for n, m, b in data.bonds(): - if atoms[n].atomic_symbol not in _inorganic: + if data.atom(n).atomic_symbol not in _inorganic: n, m = m, n # fix direction of dative bond mol.AddBond(mapping[n], mapping[m], _bond_map[b.order]) diff --git a/chython/utils/test/__init__.py b/chython/utils/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/utils/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/chython/utils/test/test_rdkit.py b/chython/utils/test/test_rdkit.py new file mode 100644 index 00000000..5af40af0 --- /dev/null +++ b/chython/utils/test/test_rdkit.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles +from chython.utils.rdkit import * +from pytest import mark +from rdkit import Chem +from rdkit.Chem import AllChem + + +data = [ + 'CCO', + 'C/C=C/C', + 'C[C@H](O)CC', + 'C\C=C/O[C@@H]1OC[C@@H](Oc2ccccc2)[C@@H](O)[C@H]1O\C=C\C', + '[nH]1cccc1', + 'C\C=C\C=C', + 'C[C@@H](O)[C@H](O)[C@H](C)O' +] + +@mark.parametrize('source', data) +def test_to_rdkit(source): + mol = smiles(source) + rd_mol = to_rdkit_molecule(mol, keep_mapping=False) + rd_mol_mapping = to_rdkit_molecule(mol, keep_mapping=True) + + assert format(smiles(Chem.MolToSmiles(rd_mol)), 'h') == format(mol, 'h') + assert format(smiles(Chem.MolToSmiles(rd_mol_mapping)), 'm') == format(mol, 'm') + + +@mark.parametrize('source', data) +def test_from_rdkit(source): + assert format(from_rdkit_molecule(Chem.MolFromSmiles(source)), 'h') == format(smiles(source), 'h') + + +def test_coordinates(): + rd_mol = to_rdkit_molecule(smiles('CCO'), keep_mapping=False) + + AllChem.Compute2DCoords(rd_mol) + mol = from_rdkit_molecule(rd_mol) + assert any(a.x for _, a in mol.atoms()) + + rd_mol_h = Chem.AddHs(rd_mol) + AllChem.EmbedMolecule(rd_mol_h) + rd_mol_nh = Chem.RemoveHs(rd_mol_h) + + mol = from_rdkit_molecule(rd_mol_nh) + assert hasattr(mol, '_conformers') + assert isinstance(mol._conformers, list) + assert len(mol._conformers) == 1 + assert isinstance(mol._conformers[0], dict) + assert len(mol._conformers[0]) == 3 + assert all(tuple(x) for x in mol._conformers[0].values()) + assert all(len(x) == 3 for x in mol._conformers[0].values()) + assert all(isinstance(x, float) for x in mol._conformers[0].values() for x in x) + assert any(x for x in mol._conformers[0].values() for x in x)