From 983e5b6e890accd7fe9153c661314a4c49cef8d9 Mon Sep 17 00:00:00 2001 From: reginaib Date: Thu, 31 Oct 2024 13:05:57 +0100 Subject: [PATCH 01/51] periodic table refactored. --- chython/periodictable/__init__.py | 14 +- .../{element => base}/__init__.py | 9 +- .../{element => base}/dynamic.py | 56 +- .../{element => base}/element.py | 152 ++++-- chython/periodictable/{ => base}/groups.py | 2 +- chython/periodictable/{ => base}/periods.py | 2 +- chython/periodictable/base/query.py | 479 ++++++++++++++++++ chython/periodictable/element/core.py | 118 ----- chython/periodictable/element/query.py | 318 ------------ chython/periodictable/groupI.py | 8 +- chython/periodictable/groupII.py | 8 +- chython/periodictable/groupIII.py | 8 +- chython/periodictable/groupIV.py | 8 +- chython/periodictable/groupIX.py | 8 +- chython/periodictable/groupV.py | 8 +- chython/periodictable/groupVI.py | 8 +- chython/periodictable/groupVII.py | 8 +- chython/periodictable/groupVIII.py | 8 +- chython/periodictable/groupX.py | 6 +- chython/periodictable/groupXI.py | 6 +- chython/periodictable/groupXII.py | 8 +- chython/periodictable/groupXIII.py | 8 +- chython/periodictable/groupXIV.py | 6 +- chython/periodictable/groupXV.py | 6 +- chython/periodictable/groupXVI.py | 6 +- chython/periodictable/groupXVII.py | 6 +- chython/periodictable/groupXVIII.py | 8 +- 27 files changed, 700 insertions(+), 582 deletions(-) rename chython/periodictable/{element => base}/__init__.py (71%) rename chython/periodictable/{element => base}/dynamic.py (73%) rename chython/periodictable/{element => base}/element.py (79%) rename chython/periodictable/{ => base}/groups.py (95%) rename chython/periodictable/{ => base}/periods.py (93%) create mode 100644 chython/periodictable/base/query.py delete mode 100644 chython/periodictable/element/core.py delete mode 100644 chython/periodictable/element/query.py diff --git a/chython/periodictable/__init__.py b/chython/periodictable/__init__.py index 304f6e44..5f272d31 100644 --- a/chython/periodictable/__init__.py +++ b/chython/periodictable/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2021 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,9 +17,9 @@ # along with this program; if not, see . # from abc import ABCMeta -from .element import * -from .groups import * -from .periods import * +from .base import * +from .base.groups import * +from .base.periods import * from .groupI import * from .groupII import * from .groupIII import * @@ -51,9 +51,9 @@ for _class in (DynamicElement, QueryElement): for k, v in elements.items(): name = f'{_class.__name__[:-7]}{k}' - globals()[name] = cls = type(name, (_class, *v.__mro__[-3:-1]), - {'__module__': v.__module__, '__slots__': (), 'atomic_number': v.atomic_number, - 'atomic_radius': v.atomic_radius}) + globals()[name] = cls = type(name, + (_class, *v.__mro__[-3:-1]), + {'__module__': v.__module__, '__slots__': (), 'atomic_number': v.atomic_number}) setattr(modules[v.__module__], name, cls) modules[v.__module__].__all__.append(name) __all__.append(name) diff --git a/chython/periodictable/element/__init__.py b/chython/periodictable/base/__init__.py similarity index 71% rename from chython/periodictable/element/__init__.py rename to chython/periodictable/base/__init__.py index 1fecc8f4..f63b3bb6 100644 --- a/chython/periodictable/element/__init__.py +++ b/chython/periodictable/base/__init__.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov -# Copyright 2019 Tagir Akhmetshin -# Copyright 2019 Dayana Bashirova +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,10 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .core import * +from .dynamic import * from .element import * from .query import * -from .dynamic import * -__all__ = ['Core', 'Element', 'DynamicElement', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Element', 'DynamicElement', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/element/dynamic.py b/chython/periodictable/base/dynamic.py similarity index 73% rename from chython/periodictable/element/dynamic.py rename to chython/periodictable/base/dynamic.py index 70aaaabd..d0989547 100644 --- a/chython/periodictable/element/dynamic.py +++ b/chython/periodictable/base/dynamic.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,20 +16,32 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from abc import ABC -from typing import Type, Union -from .core import Core +from abc import ABC, abstractmethod +from typing import Type, Union, Optional from .element import Element -from ...exceptions import IsNotConnectedAtom -class DynamicElement(Core, ABC): - __slots__ = ('__p_charge', '__p_is_radical') +class DynamicElement(ABC): + __slots__ = ('_charge', '_is_radical', '_p_charge', '_p_is_radical', '_isotope') + + def __init__(self, isotope: Optional[int]): + self._isotope = isotope + + @property + def isotope(self): + return self._isotope @property def atomic_symbol(self) -> str: return self.__class__.__name__[7:] + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + @classmethod def from_symbol(cls, symbol: str) -> Type['DynamicElement']: """ @@ -63,19 +75,21 @@ def from_atom(cls, atom: Union['Element', 'DynamicElement']) -> 'DynamicElement' raise TypeError('Element or DynamicElement expected') return atom.copy() + @property + def charge(self) -> int: + return self._charge + + @property + def is_radical(self) -> bool: + return self._is_radical + @property def p_charge(self) -> int: - try: - return self._graph()._p_charges[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._p_charge @property def p_is_radical(self) -> bool: - try: - return self._graph()._p_radicals[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._p_is_radical def __eq__(self, other): """ @@ -96,5 +110,17 @@ def is_dynamic(self) -> bool: """ return self.charge != self.p_charge or self.is_radical != self.p_is_radical + def copy(self): + copy = object.__new__(self.__class__) + copy._isotope = self.isotope + copy._charge = self.charge + copy._is_radical = self.is_radical + copy._p_is_radical = self.p_is_radical + copy._p_charge = self.p_charge + return copy + + def __copy__(self): + return self.copy() + __all__ = ['DynamicElement'] diff --git a/chython/periodictable/element/element.py b/chython/periodictable/base/element.py similarity index 79% rename from chython/periodictable/element/element.py rename to chython/periodictable/base/element.py index 22a28386..c3703336 100644 --- a/chython/periodictable/element/element.py +++ b/chython/periodictable/base/element.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,12 +20,11 @@ from CachedMethods import class_cached_property from collections import defaultdict from typing import Dict, List, Optional, Set, Tuple, Type -from .core import Core from ...exceptions import IsNotConnectedAtom, ValenceError -class Element(Core, ABC): - __slots__ = () +class Element(ABC): + __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens') __class_cache__ = {} def __init__(self, isotope: Optional[int] = None): @@ -39,12 +38,35 @@ def __init__(self, isotope: Optional[int] = None): raise ValueError(f'isotope number {isotope} impossible or not stable for {self.atomic_symbol}') elif isotope is not None: raise TypeError('integer isotope number required') - super().__init__(isotope) + self._isotope = isotope + self._charge = 0 + self._is_radical = False + self._x = self._y = 0 + self._implicit_hydrogens = None + + def __repr__(self): + if self._isotope: + return f'{self.__class__.__name__}({self._isotope})' + return f'{self.__class__.__name__}()' @property def atomic_symbol(self) -> str: return self.__class__.__name__ + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + + @property + def isotope(self) -> Optional[int]: + """ + Isotope number + """ + return self._isotope + @property def atomic_mass(self) -> float: mass = self.isotopes_masses @@ -73,72 +95,103 @@ def atomic_radius(self) -> float: Valence radius of atom """ - @Core.charge.setter - def charge(self, charge: int): - if not isinstance(charge, int): + @property + def charge(self) -> int: + """ + Charge of atom + """ + return self._charge + + @charge.setter + def charge(self, value: int): + """ + Update charge of atom. Make sure to flush cache and recalculate hydrogens count and stereo. + Or use context manager on molecule: + + with mol: + mol.atom(1).charge = 1 + """ + if not isinstance(value, int): raise TypeError('formal charge should be int in range [-4, 4]') - elif charge > 4 or charge < -4: + elif value > 4 or value < -4: raise ValueError('formal charge should be in range [-4, 4]') - try: - g = self._graph() - g._charges[self._n] = charge - except AttributeError: - raise IsNotConnectedAtom - else: - g._calc_implicit(self._n) - g.flush_cache() - g.fix_stereo() + self._charge = value + + @property + def is_radical(self) -> bool: + """ + Radical state of atoms + """ + return self._is_radical + + @is_radical.setter + def is_radical(self, value: bool): + """ + Update radical state of atom. Make sure to flush cache and recalculate hydrogens count and stereo. + Or use context manager on molecule: - @Core.is_radical.setter - def is_radical(self, is_radical: bool): - if not isinstance(is_radical, bool): + with mol: + mol.atom(1).is_radical = True + """ + if not isinstance(value, bool): raise TypeError('bool expected') - try: - g = self._graph() - g._radicals[self._n] = is_radical - except AttributeError: - raise IsNotConnectedAtom - else: - g._calc_implicit(self._n) - g.flush_cache() - g.fix_stereo() + self._is_radical = value @property def x(self) -> float: """ X coordinate of atom on 2D plane """ - try: - return self._graph()._plane[self._n][0] - except AttributeError: - raise IsNotConnectedAtom + return self._x + + @x.setter + def x(self, value: float): + if not isinstance(value, float): + raise TypeError('float expected') + self._x = value @property def y(self) -> float: """ Y coordinate of atom on 2D plane """ - try: - return self._graph()._plane[self._n][1] - except AttributeError: - raise IsNotConnectedAtom + return self._y + + @y.setter + def y(self, value: float): + if not isinstance(value, float): + raise TypeError('float expected') + self._y = value @property def xy(self) -> Tuple[float, float]: """ (X, Y) coordinates of atom on 2D plane """ - try: - return self._graph()._plane[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._x, self._y + + @xy.setter + def xy(self, value: Tuple[float, float]): + if (not isinstance(value, (tuple, list)) + or len(value) != 2 + or not isinstance(value[0], float) + or not isinstance(value[1], float)): + raise TypeError('tuple of 2 floats expected') + self._x, self._y = value @property def implicit_hydrogens(self) -> Optional[int]: - try: - return self._graph()._hydrogens[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._implicit_hydrogens + + def copy(self): + copy = object.__new__(self.__class__) + copy._isotope = self.isotope + copy._charge = self.charge + copy._is_radical = self.is_radical + return copy + + def __copy__(self): + return self.copy() @property def explicit_hydrogens(self) -> int: @@ -149,10 +202,9 @@ def explicit_hydrogens(self) -> int: @property def total_hydrogens(self) -> int: - try: - return self._graph().total_hydrogens(self._n) - except AttributeError: - raise IsNotConnectedAtom + if self._implicit_hydrogens is None: + raise ValenceError + return self._implicit_hydrogens + self.explicit_hydrogens @property def heteroatoms(self) -> int: diff --git a/chython/periodictable/groups.py b/chython/periodictable/base/groups.py similarity index 95% rename from chython/periodictable/groups.py rename to chython/periodictable/base/groups.py index 912c9ae3..75809c61 100644 --- a/chython/periodictable/groups.py +++ b/chython/periodictable/base/groups.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/periods.py b/chython/periodictable/base/periods.py similarity index 93% rename from chython/periodictable/periods.py rename to chython/periodictable/base/periods.py index 2f3e6cba..f05e6d08 100644 --- a/chython/periodictable/periods.py +++ b/chython/periodictable/base/periods.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py new file mode 100644 index 00000000..5ae7adb5 --- /dev/null +++ b/chython/periodictable/base/query.py @@ -0,0 +1,479 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2024 Ramil Nugmanov +# Copyright 2021 Dmitrij Zanadvornykh +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from abc import ABC, abstractmethod +from functools import cached_property +from typing import Tuple, Type, List, Union, Optional +from .element import Element + + +_inorganic = {'He', 'Ne', 'Ar', 'Kr', 'Xe', 'F', 'Cl', 'Br', 'I', 'B', 'C', 'N', 'O', + 'H', 'Si', 'P', 'S', 'Se', 'Ge', 'As', 'Sb', 'Te', 'At'} + + +def _validate(value, prop): + if value is None: + return () + elif isinstance(value, int): + if value < 0 or value > 14: + raise ValueError(f'{prop} should be in range [0, 14]') + return (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(x, int) for x in value): + raise TypeError(f'{prop} should be list or tuple of ints') + if any(x < 0 or x > 14 for x in value): + raise ValueError(f'{prop} should be in range [0, 14]') + if len(set(value)) != len(value): + raise ValueError(f'{prop} should be unique') + return tuple(sorted(value)) + else: + raise TypeError(f'{prop} should be int or list or tuple of ints') + + +class Query(ABC): + __slots__ = ('_neighbors', '_hybridization', '_masked') + + def __init__(self): + self._neighbors = () + self._hybridization = () + self._masked = False + + @property + def neighbors(self) -> Tuple[int, ...]: + return self._neighbors + + @neighbors.setter + def neighbors(self, value): + self._neighbors = _validate(value, 'neighbors') + + @property + def hybridization(self) -> Tuple[int, ...]: + return self._hybridization + + @hybridization.setter + def hybridization(self, value): + if value is None: + self._hybridization = () + elif isinstance(value, int): + if value < 1 or value > 4: + raise ValueError('hybridization should be in range [1, 4]') + self._hybridization = (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(h, int) for h in value): + raise TypeError('hybridizations should be list or tuple of ints') + if any(h < 1 or h > 4 for h in value): + raise ValueError('hybridizations should be in range [1, 4]') + if len(set(value)) != len(value): + raise ValueError('hybridizations should be unique') + self._hybridization = tuple(sorted(value)) + else: + raise TypeError('hybridization should be int or list or tuple of ints') + + @property + def masked(self): + return self._masked + + @masked.setter + def masked(self, value): + if not isinstance(value, bool): + raise TypeError('masked should be bool') + self._masked = value + + def copy(self): + copy = object.__new__(self.__class__) + copy._neighbors = self.neighbors + copy._hybridization = self.hybridization + copy._masked = self.masked + return copy + + def __copy__(self): + return self.copy() + + def __repr__(self): + return f'{self.__class__.__name__}()' + + +class ExtendedQuery(Query, ABC): + __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens') + + def __init__(self): + super().__init__() + self._charge = 0 + self._is_radical = False + self._heteroatoms = () + self._ring_sizes = () + self._implicit_hydrogens = () + + @property + def charge(self) -> int: + """ + Charge of atom + """ + return self._charge + + @charge.setter + def charge(self, value: int): + if not isinstance(value, int): + raise TypeError('formal charge should be int in range [-4, 4]') + elif value > 4 or value < -4: + raise ValueError('formal charge should be in range [-4, 4]') + self._charge = value + + @property + def is_radical(self) -> bool: + """ + Radical state of atoms + """ + return self._is_radical + + @is_radical.setter + def is_radical(self, value: bool): + if not isinstance(value, bool): + raise TypeError('bool expected') + self._is_radical = value + + @property + def heteroatoms(self) -> Tuple[int, ...]: + return self._heteroatoms + + @heteroatoms.setter + def heteroatoms(self, value): + self._heteroatoms = _validate(value, 'heteroatoms') + + @property + def implicit_hydrogens(self) -> Tuple[int, ...]: + return self._implicit_hydrogens + + @implicit_hydrogens.setter + def implicit_hydrogens(self, value): + self._implicit_hydrogens = _validate(value, 'implicit hydrogens') + + @property + def ring_sizes(self) -> Tuple[int, ...]: + """ + Atom rings sizes. + """ + return self._ring_sizes + + @ring_sizes.setter + def ring_sizes(self, value): + if value is None: + self._ring_sizes = () + elif isinstance(value, int): + if value < 3 and value != 0: + raise ValueError('rings should be greater or equal 3. ring equal to zero is no ring atom mark') + self._ring_sizes = (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(x, int) for x in value): + raise TypeError('rings should be list or tuple of ints') + if any(x < 3 for x in value): + raise ValueError('rings should be greater or equal 3') + if len(set(value)) != len(value): + raise ValueError('rings should be unique') + self._ring_sizes = tuple(sorted(value)) + else: + raise TypeError('rings should be int or list or tuple of ints') + + def copy(self): + copy = super().copy() + copy._charge = self.charge + copy._is_radical = self.is_radical + copy._heteroatoms = self.heteroatoms + copy._implicit_hydrogens = self.implicit_hydrogens + copy._ring_sizes = self.ring_sizes + return copy + + +class AnyMetal(Query): + """ + Charge and radical ignored any metal. Rings, hydrogens and heteroatoms count also ignored. + + Class designed for d-elements matching in standardization. + """ + __slots__ = () + + @property + def atomic_symbol(self) -> str: + return 'M' + + def __eq__(self, other): + if isinstance(other, Element): + if other.atomic_symbol in _inorganic: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + return True + # metal is subset of metal. only + return (isinstance(other, AnyMetal) + and self.neighbors == other.neighbors + and self.hybridization == other.hybridization) + + def __hash__(self): + return hash((self.neighbors, self.hybridization)) + + +class AnyElement(ExtendedQuery): + __slots__ = () + + @property + def atomic_symbol(self) -> str: + return 'A' + + def __eq__(self, other): + """ + Compare attached to molecules elements and query elements + """ + if isinstance(other, Element): + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if set(self.ring_sizes).isdisjoint(other.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + # any is subset of any. only + return (isinstance(other, AnyElement) + and self.charge == other.charge + and self.is_radical == other.is_radical + and self.neighbors == other.neighbors + and self.hybridization == other.hybridization + and self.ring_sizes == other.ring_sizes + and self.implicit_hydrogens == other.implicit_hydrogens + and self.heteroatoms == other.heteroatoms) + + def __hash__(self): + return hash((self.charge, self.is_radical, self.neighbors, self.hybridization, + self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) + + +class ListElement(ExtendedQuery): + __slots__ = ('_elements', '__dict__') + + def __init__(self, elements: List[str]): + """ + Elements list + """ + if not isinstance(elements, (list, tuple)) or not elements: + raise ValueError('invalid elements list') + super().__init__() + self._elements = tuple(elements) + + @property + def atomic_symbol(self) -> str: + return ','.join(self._elements) + + @cached_property + def atomic_numbers(self): + return tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() if x.__name__ in self._elements) + + def copy(self): + copy = super().copy() + copy._elements = self._elements + return copy + + def __eq__(self, other): + """ + Compare attached to molecules elements and query elements + """ + if isinstance(other, Element): + if other.atomic_number not in self.atomic_numbers: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if set(self.ring_sizes).isdisjoint(other.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + # List is subset of Any and List + elif (isinstance(other, (ListElement, AnyElement)) + and self.charge == other.charge + and self.is_radical == other.is_radical + and self.neighbors == other.neighbors + and self.hybridization == other.hybridization + and self.ring_sizes == other.ring_sizes + and self.implicit_hydrogens == other.implicit_hydrogens + and self.heteroatoms == other.heteroatoms): + # list should contain all elements of other list + if isinstance(other, ListElement): + return set(self.atomic_numbers).issubset(other.atomic_numbers) + return True + return False + + def __hash__(self): + return hash((self.atomic_numbers, self.charge, self.is_radical, self.neighbors, self.hybridization, + self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) + + def __repr__(self): + return f'{self.__class__.__name__}([{",".join(self._elements)}])' + + +class QueryElement(ExtendedQuery, ABC): + __slots__ = ('_isotope',) + + def __init__(self, isotope: Optional[int]): + if isotope is not None and not isinstance(isotope, int): + raise TypeError('isotope must be an int') + super().__init__() + self._isotope = isotope + + def __repr__(self): + if self._isotope: + return f'{self.__class__.__name__}({self._isotope})' + return f'{self.__class__.__name__}()' + + @property + def atomic_symbol(self) -> str: + return self.__class__.__name__[5:] + + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + + @property + def isotope(self): + return self._isotope + + @classmethod + def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: + """ + get Element class by its symbol + """ + if symbol == 'A': + return AnyElement + elif symbol == 'M': + return AnyMetal + try: + element = next(x for x in QueryElement.__subclasses__() if x.__name__ == f'Query{symbol}') + except StopIteration: + raise ValueError(f'QueryElement with symbol "{symbol}" not found') + return element + + @classmethod + def from_atomic_number(cls, number: int) -> Type['QueryElement']: + """ + get Element class by its number + """ + try: + element = next(x for x in QueryElement.__subclasses__() if x.atomic_number.fget(None) == number) + except StopIteration: + raise ValueError(f'QueryElement with number "{number}" not found') + return element + + @classmethod + def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': + """ + get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement + """ + if isinstance(atom, Element): + # transfer true atomic props + query = cls.from_atomic_number(atom.atomic_number)(atom.isotope) + query._charge = atom.charge + query._is_radical = atom.is_radical + return query + elif not isinstance(atom, Query): + raise TypeError('Element or Query expected') + return atom.copy() + + def copy(self): + copy = super().copy() + copy._isotope = self.isotope + return copy + + def __eq__(self, other): + """ + compare attached to molecules elements and query elements + """ + if isinstance(other, Element): + if self.atomic_number != other.atomic_number: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.isotope and self.isotope != other.isotope: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if set(self.ring_sizes).isdisjoint(other.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + elif (isinstance(other, ExtendedQuery) + and self.charge == other.charge + and self.is_radical == other.is_radical + and self.neighbors == other.neighbors + and self.hybridization == other.hybridization + and self.ring_sizes == other.ring_sizes + and self.implicit_hydrogens == other.implicit_hydrogens + and self.heteroatoms == other.heteroatoms): + # query element should fully match other query element + if isinstance(other, QueryElement): + return self.atomic_number == other.atomic_number and self.isotope == other.isotope + # query element is subset of any element + elif isinstance(other, AnyElement): + return True + # query element should be in list + return isinstance(other, ListElement) and self.atomic_number in other.atomic_numbers + return False + + def __hash__(self): + return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.neighbors, + self.hybridization, self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) + + +__all__ = ['Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/element/core.py b/chython/periodictable/element/core.py deleted file mode 100644 index f5ab05ca..00000000 --- a/chython/periodictable/element/core.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020-2022 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from abc import ABC, abstractmethod -from typing import Optional, TypeVar -from weakref import ref -from ...exceptions import IsConnectedAtom, IsNotConnectedAtom - - -T = TypeVar('T') - - -class Core(ABC): - __slots__ = ('__isotope', '_graph', '_n') - - def __init__(self, isotope: Optional[int] = None): - self.__isotope = isotope - - def __repr__(self): - if self.__isotope: - return f'{self.__class__.__name__}({self.__isotope})' - return f'{self.__class__.__name__}()' - - def __getstate__(self): - return {'isotope': self.__isotope} - - def __setstate__(self, state): - self.__isotope = state['isotope'] - - @abstractmethod - def __hash__(self): - """ - Atom hash used in Morgan atom numbering algorithm. - """ - - @property - @abstractmethod - def atomic_symbol(self) -> str: - """ - Element symbol - """ - - @property - @abstractmethod - def atomic_number(self) -> int: - """ - Element number - """ - - @property - def isotope(self) -> Optional[int]: - """ - Isotope number - """ - return self.__isotope - - @property - def charge(self) -> int: - """ - Charge of atom - """ - try: - return self._graph()._charges[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def is_radical(self) -> bool: - """ - Radical state of atoms - """ - try: - return self._graph()._radicals[self._n] - except AttributeError: - raise IsNotConnectedAtom - - def copy(self: T) -> T: - """ - Detached from graph copy of element - """ - copy = object.__new__(self.__class__) - copy._Core__isotope = self.__isotope - return copy - - def _attach_graph(self, graph, n): - try: - self._graph - except AttributeError: - self._graph = ref(graph) - self._n = n - else: - raise IsConnectedAtom - - def _change_map(self, n): - try: - self._graph - except AttributeError: - raise IsNotConnectedAtom - else: - self._n = n - - -__all__ = ['Core'] diff --git a/chython/periodictable/element/query.py b/chython/periodictable/element/query.py deleted file mode 100644 index 94b9edca..00000000 --- a/chython/periodictable/element/query.py +++ /dev/null @@ -1,318 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020-2024 Ramil Nugmanov -# Copyright 2021 Dmitrij Zanadvornykh -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from abc import ABC -from typing import Tuple, Type, List, Union -from .core import Core -from .element import Element -from ...exceptions import IsNotConnectedAtom - - -_inorganic = {'He', 'Ne', 'Ar', 'Kr', 'Xe', 'F', 'Cl', 'Br', 'I', 'B', 'C', 'N', 'O', - 'H', 'Si', 'P', 'S', 'Se', 'Ge', 'As', 'Sb', 'Te', 'At'} - - -class Query(Core, ABC): - __slots__ = () - - @property - def neighbors(self) -> Tuple[int, ...]: - try: - return self._graph()._neighbors[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def hybridization(self): - try: - return self._graph()._hybridizations[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def heteroatoms(self) -> Tuple[int, ...]: - try: - return self._graph()._heteroatoms[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def ring_sizes(self) -> Tuple[int, ...]: - """ - Atom rings sizes. - """ - try: - return self._graph()._rings_sizes[self._n] - except AttributeError: - raise IsNotConnectedAtom - except KeyError: - return () - - @property - def implicit_hydrogens(self) -> Tuple[int, ...]: - try: - return self._graph()._hydrogens[self._n] - except AttributeError: - raise IsNotConnectedAtom - - -class QueryElement(Query, ABC): - __slots__ = () - - @property - def atomic_symbol(self) -> str: - return self.__class__.__name__[5:] - - @classmethod - def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: - """ - get Element class by its symbol - """ - if symbol == 'A': - return AnyElement - elif symbol == 'M': - return AnyMetal - try: - element = next(x for x in QueryElement.__subclasses__() if x.__name__ == f'Query{symbol}') - except StopIteration: - raise ValueError(f'QueryElement with symbol "{symbol}" not found') - return element - - @classmethod - def from_atomic_number(cls, number: int) -> Type['QueryElement']: - """ - get Element class by its number - """ - try: - element = next(x for x in QueryElement.__subclasses__() if x.atomic_number.fget(None) == number) - except StopIteration: - raise ValueError(f'QueryElement with number "{number}" not found') - return element - - @classmethod - def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': - """ - get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement - """ - if isinstance(atom, Element): - return cls.from_atomic_number(atom.atomic_number)(atom.isotope) - elif not isinstance(atom, Query): - raise TypeError('Element or Query expected') - return atom.copy() - - def __eq__(self, other): - """ - compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if self.atomic_number == other.atomic_number and self.charge == other.charge and \ - self.is_radical == other.is_radical: - if self.isotope and self.isotope != other.isotope: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, QueryElement) and self.atomic_number == other.atomic_number and \ - self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - # equal query element has equal query marks - return True - return False - - def __hash__(self): - return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.neighbors, - self.hybridization, self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - - -class AnyElement(Query): - __slots__ = () - - def __init__(self, *args, **kwargs): - super().__init__() - - @property - def atomic_symbol(self) -> str: - return 'A' - - @property - def atomic_number(self) -> int: - return 0 - - def __eq__(self, other): - """ - Compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if self.charge == other.charge and self.is_radical == other.is_radical: - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, AnyMetal): - return False - elif isinstance(other, Query) and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - return True - return False - - def __hash__(self): - return hash((self.charge, self.is_radical, self.neighbors, self.hybridization, self.ring_sizes, - self.implicit_hydrogens, self.heteroatoms)) - - -class AnyMetal(Query): - """ - Charge and radical ignored any metal. Rings, hydrogens and heteroatoms count also ignored. - - Class designed for d-elements matching in standardization. - """ - def __init__(self, *args, **kwargs): - super().__init__() - - @property - def atomic_symbol(self) -> str: - return 'M' - - @property - def atomic_number(self) -> int: - return 0 - - def __eq__(self, other): - if isinstance(other, Element): - if other.atomic_symbol not in _inorganic: - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - return True - elif isinstance(other, AnyMetal) and self.neighbors == other.neighbors \ - and self.hybridization == other.hybridization: - return True - return False - - def __hash__(self): - return hash((self.neighbors, self.hybridization)) - - -class ListElement(Query): - __slots__ = ('_elements', '_numbers') - - def __init__(self, elements: List[str], *args, **kwargs): - """ - Elements list - """ - super().__init__() - self._elements = tuple(elements) - self._numbers = tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() if x.__name__ in elements) - - @property - def atomic_symbol(self) -> str: - return ','.join(self._elements) - - @property - def atomic_number(self) -> int: - return 0 - - def copy(self): - copy = super().copy() - copy._elements = self._elements - copy._numbers = self._numbers - return copy - - def __eq__(self, other): - """ - Compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if other.atomic_number in self._numbers: - if self.charge != other.charge or self.is_radical != other.is_radical: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, (AnyElement, AnyMetal)): - return False - elif isinstance(other, Query) and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - if isinstance(other, ListElement): - return self._numbers == other._numbers - return other.atomic_number in self._numbers - return False - - def __hash__(self): - return hash((self._numbers, self.charge, self.is_radical, self.neighbors, self.hybridization, - self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - - def __getstate__(self): - state = super().__getstate__() - state['elements'] = self._elements - return state - - def __setstate__(self, state): - self._elements = state['elements'] - self._numbers = tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() - if x.__name__ in state['elements']) - super().__setstate__(state) - - def __repr__(self): - return f'{self.__class__.__name__}([{",".join(self._elements)}])' - - -__all__ = ['Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/groupI.py b/chython/periodictable/groupI.py index 9b06949d..a7c10f55 100644 --- a/chython/periodictable/groupI.py +++ b/chython/periodictable/groupI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,9 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupI -from .periods import * +from .base import Element +from .base.groups import GroupI +from .base.periods import * class H(Element, PeriodI, GroupI): diff --git a/chython/periodictable/groupII.py b/chython/periodictable/groupII.py index 0df4a674..bae2cf65 100644 --- a/chython/periodictable/groupII.py +++ b/chython/periodictable/groupII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class Be(Element, PeriodII, GroupII): diff --git a/chython/periodictable/groupIII.py b/chython/periodictable/groupIII.py index 60c57630..a2683f8d 100644 --- a/chython/periodictable/groupIII.py +++ b/chython/periodictable/groupIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Sc(Element, PeriodIV, GroupIII): diff --git a/chython/periodictable/groupIV.py b/chython/periodictable/groupIV.py index cc22146a..c80e1482 100644 --- a/chython/periodictable/groupIV.py +++ b/chython/periodictable/groupIV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIV -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIV +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Ti(Element, PeriodIV, GroupIV): diff --git a/chython/periodictable/groupIX.py b/chython/periodictable/groupIX.py index 6cf22449..97608fd9 100644 --- a/chython/periodictable/groupIX.py +++ b/chython/periodictable/groupIX.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIX -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIX +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Co(Element, PeriodIV, GroupIX): diff --git a/chython/periodictable/groupV.py b/chython/periodictable/groupV.py index e923cec1..66036c63 100644 --- a/chython/periodictable/groupV.py +++ b/chython/periodictable/groupV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # Copyright 2019 Tagir Akhmetshin # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupV -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupV +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class V(Element, PeriodIV, GroupV): diff --git a/chython/periodictable/groupVI.py b/chython/periodictable/groupVI.py index 6fa24b94..03b76191 100644 --- a/chython/periodictable/groupVI.py +++ b/chython/periodictable/groupVI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Dayana Bashirova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVI -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVI +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Cr(Element, PeriodIV, GroupVI): diff --git a/chython/periodictable/groupVII.py b/chython/periodictable/groupVII.py index c66e89d9..3fceee40 100644 --- a/chython/periodictable/groupVII.py +++ b/chython/periodictable/groupVII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Mn(Element, PeriodIV, GroupVII): diff --git a/chython/periodictable/groupVIII.py b/chython/periodictable/groupVIII.py index 3d88324b..ea510d60 100644 --- a/chython/periodictable/groupVIII.py +++ b/chython/periodictable/groupVIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVIII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVIII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Fe(Element, PeriodIV, GroupVIII): diff --git a/chython/periodictable/groupX.py b/chython/periodictable/groupX.py index 80a499a4..0ca6aa05 100644 --- a/chython/periodictable/groupX.py +++ b/chython/periodictable/groupX.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupX -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupX +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Ni(Element, PeriodIV, GroupX): diff --git a/chython/periodictable/groupXI.py b/chython/periodictable/groupXI.py index 40bc7c91..96be94af 100644 --- a/chython/periodictable/groupXI.py +++ b/chython/periodictable/groupXI.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXI -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXI +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Cu(Element, PeriodIV, GroupXI): diff --git a/chython/periodictable/groupXII.py b/chython/periodictable/groupXII.py index 7b48dfad..17a3e8cf 100644 --- a/chython/periodictable/groupXII.py +++ b/chython/periodictable/groupXII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Dayana Bashirova # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Zn(Element, PeriodIV, GroupXII): diff --git a/chython/periodictable/groupXIII.py b/chython/periodictable/groupXIII.py index dd5d728c..c0d3f507 100644 --- a/chython/periodictable/groupXIII.py +++ b/chython/periodictable/groupXIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXIII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXIII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class B(Element, PeriodII, GroupXIII): diff --git a/chython/periodictable/groupXIV.py b/chython/periodictable/groupXIV.py index ae2be925..0a18f705 100644 --- a/chython/periodictable/groupXIV.py +++ b/chython/periodictable/groupXIV.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXIV -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXIV +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class C(Element, PeriodII, GroupXIV): diff --git a/chython/periodictable/groupXV.py b/chython/periodictable/groupXV.py index 52f9b545..218aeecc 100644 --- a/chython/periodictable/groupXV.py +++ b/chython/periodictable/groupXV.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXV -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXV +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class N(Element, PeriodII, GroupXV): diff --git a/chython/periodictable/groupXVI.py b/chython/periodictable/groupXVI.py index fd060971..4791eb2a 100644 --- a/chython/periodictable/groupXVI.py +++ b/chython/periodictable/groupXVI.py @@ -19,9 +19,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVI -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXVI +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class O(Element, PeriodII, GroupXVI): diff --git a/chython/periodictable/groupXVII.py b/chython/periodictable/groupXVII.py index 064722c2..da6ce4c0 100644 --- a/chython/periodictable/groupXVII.py +++ b/chython/periodictable/groupXVII.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXVII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class F(Element, PeriodII, GroupXVII): diff --git a/chython/periodictable/groupXVIII.py b/chython/periodictable/groupXVIII.py index 692fd9b4..849a893c 100644 --- a/chython/periodictable/groupXVIII.py +++ b/chython/periodictable/groupXVIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVIII -from .periods import * +from .base import Element +from .base.groups import GroupXVIII +from .base.periods import * class He(Element, PeriodI, GroupXVIII): From 688a27a285b0f1ef70b2cfaed8797c86c0cadbc5 Mon Sep 17 00:00:00 2001 From: reginaib Date: Thu, 31 Oct 2024 16:29:13 +0100 Subject: [PATCH 02/51] saved --- chython/containers/graph.py | 89 +++--------- chython/containers/molecule.py | 194 ++++++++++--------------- chython/containers/query.py | 134 +---------------- chython/periodictable/base/__init__.py | 2 +- chython/periodictable/base/element.py | 73 ++++++---- chython/periodictable/base/query.py | 27 ++-- 6 files changed, 160 insertions(+), 359 deletions(-) diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 4d9ad441..17f7a175 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -29,25 +29,16 @@ class Graph(Generic[Atom, Bond], Morgan, Rings, ABC): - __slots__ = ('_atoms', '_bonds', '_charges', '_radicals', '_atoms_stereo', '_cis_trans_stereo', '_allenes_stereo', - '__dict__', '__weakref__') + __slots__ = ('_atoms', '_bonds', '_cis_trans_stereo', '__dict__', '__weakref__') __class_cache__ = {} _atoms: Dict[int, Atom] _bonds: Dict[int, Dict[int, Bond]] - _charges: Dict[int, int] - _radicals: Dict[int, bool] - _atoms_stereo: Dict[int, bool] - _allenes_stereo: Dict[int, bool] _cis_trans_stereo: Dict[Tuple[int, int], bool] def __init__(self): self._atoms = {} self._bonds = {} - self._charges = {} - self._radicals = {} - self._atoms_stereo = {} - self._allenes_stereo = {} self._cis_trans_stereo = {} def atom(self, n: int) -> Atom: @@ -99,7 +90,7 @@ def bonds_count(self) -> int: return sum(len(x) for x in self._bonds.values()) // 2 @abstractmethod - def add_atom(self, atom: Atom, n: Optional[int] = None, *, charge: int = 0, is_radical: bool = False) -> int: + def add_atom(self, atom: Atom, n: Optional[int] = None) -> int: """ new atom addition """ @@ -109,19 +100,10 @@ def add_atom(self, atom: Atom, n: Optional[int] = None, *, charge: int = 0, is_r raise TypeError('mapping should be integer') elif n in self._atoms: raise MappingError('atom with same number exists') - elif not isinstance(is_radical, bool): - raise TypeError('bool expected') - elif not isinstance(charge, int): - raise TypeError('formal charge should be int in range [-4, 4]') - elif charge > 4 or charge < -4: - raise ValueError('formal charge should be in range [-4, 4]') - - atom._attach_graph(self, n) + self._atoms[n] = atom - self._charges[n] = charge - self._radicals[n] = is_radical self._bonds[n] = {} - self.__dict__.clear() + self.flush_cache() return n @abstractmethod @@ -137,7 +119,7 @@ def add_bond(self, n: int, m: int, bond: Bond): raise MappingError('atoms already bonded') self._bonds[n][m] = self._bonds[m][n] = bond - self.__dict__.clear() + self.flush_cache() @abstractmethod def copy(self): @@ -145,14 +127,16 @@ def copy(self): copy of graph """ copy = object.__new__(self.__class__) - copy._charges = self._charges.copy() - copy._radicals = self._radicals.copy() - - copy._atoms = ca = {} - for n, atom in self._atoms.items(): - atom = atom.copy() - ca[n] = atom - atom._attach_graph(copy, n) + copy._atoms = {n: atom.copy(full=True) for n, atom in self._atoms.items()} + + copy._bonds = cb = {} + for n, m_bond in self._bonds.items(): + cb[n] = cbn = {} + for m, bond in m_bond.items(): + if m in cb: # bond partially exists. need back-connection. + cbn[m] = cb[m][n] + else: + cbn[m] = bond.copy() return copy @abstractmethod @@ -168,56 +152,19 @@ def remap(self, mapping: Dict[int, int], *, copy=False): raise ValueError('mapping overlap') mg = mapping.get - sc = self._charges - sr = self._radicals - if copy: h = self.__class__() - ha = h._atoms - hc = h._charges - hr = h._radicals - has = h._atoms_stereo - hal = h._allenes_stereo + h._atoms = {mg(n, n): atom.copy(full=True) for n, atom in self._atoms.items()} hcs = h._cis_trans_stereo - - for n, atom in self._atoms.items(): - m = mg(n, n) - atom = atom.copy() - ha[m] = atom - atom._attach_graph(h, m) else: - ha = {} - hc = {} - hr = {} - has = {} - hal = {} + self._atoms = {mg(n, n): atom for n, atom in self._atoms.items()} hcs = {} - for n, atom in self._atoms.items(): - m = mg(n, n) - ha[m] = atom - atom._change_map(m) # change mapping number - - for n in self._atoms: - m = mg(n, n) - hc[m] = sc[n] - hr[m] = sr[n] - - for n, stereo in self._atoms_stereo.items(): - has[mg(n, n)] = stereo - for n, stereo in self._allenes_stereo.items(): - hal[mg(n, n)] = stereo for (n, m), stereo in self._cis_trans_stereo.items(): hcs[(mg(n, n), mg(m, m))] = stereo if copy: return h # noqa - - self._atoms = ha - self._charges = hc - self._radicals = hr - self._atoms_stereo = has - self._allenes_stereo = hal self._cis_trans_stereo = hcs self.flush_cache() return self diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 56d6987b..2c67fed2 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,7 +19,6 @@ from CachedMethods import cached_args_method from collections import Counter, defaultdict from functools import cached_property -from numpy import uint, zeros from typing import Dict, Iterable, List, Optional, Tuple, Union from weakref import ref from zlib import compress, decompress @@ -45,37 +44,29 @@ class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, X3domMolecule): - __slots__ = ('_plane', '_conformers', '_hydrogens', '_parsed_mapping', '_backup', '__meta', '__name') - - _conformers: List[Dict[int, Tuple[float, float, float]]] - _hydrogens: Dict[int, Optional[int]] - _parsed_mapping: Dict[int, int] - _plane: Dict[int, Tuple[float, float]] + __slots__ = ('_backup', '_meta', '_name', '_changed') def __init__(self): super().__init__() - self._conformers = [] - self._hydrogens = {} - self._parsed_mapping = {} - self._plane = {} - self.__meta = None - self.__name = None + self._meta = None + self._name = None + self._changed = None @property def meta(self) -> Dict: - if self.__meta is None: - self.__meta = {} # lazy - return self.__meta + if self._meta is None: + self._meta = {} # lazy + return self._meta @property def name(self) -> str: - return self.__name or '' + return self._name or '' @name.setter def name(self, name): if not isinstance(name, str): - raise TypeError('name should be string up to 80 symbols') - self.__name = name + raise TypeError('name should be a string preferably up to 80 symbols') + self._name = name def environment(self, atom: int, include_bond: bool = True, include_atom: bool = True) -> \ Tuple[Union[Tuple[int, Bond, Element], @@ -101,10 +92,9 @@ def environment(self, atom: int, include_bond: bool = True, include_atom: bool = return tuple(self._bonds[atom].items()) return tuple(self._bonds[atom]) - @cached_args_method def neighbors(self, n: int) -> int: """number of neighbors atoms excluding any-bonded""" - return sum(b.order != 8 for b in self._bonds[n].values()) + return self._atoms[n].neighbors @cached_args_method def hybridization(self, n: int) -> int: @@ -135,8 +125,7 @@ def heteroatoms(self, n: int) -> int: """ Number of neighbored heteroatoms (not carbon or hydrogen) except any-bond connected. """ - atoms = self._atoms - return sum(atoms[m].atomic_number not in (1, 6) for m, b in self._bonds[n].items() if b.order != 8) + return self._atoms[n].heteroatoms def implicit_hydrogens(self, n: int) -> Optional[int]: """ @@ -144,26 +133,23 @@ def implicit_hydrogens(self, n: int) -> Optional[int]: Returns None if count are ambiguous. """ - return self._hydrogens[n] + return self._atoms[n].implicit_hydrogens - @cached_args_method def explicit_hydrogens(self, n: int) -> int: """ Number of explicit hydrogen atoms connected to atom. Take into account any type of bonds with hydrogen atoms. """ - atoms = self._atoms - return sum(atoms[m].atomic_number == 1 for m in self._bonds[n]) + return self._atoms[n].explicit_hydrogens - @cached_args_method def total_hydrogens(self, n: int) -> int: """ Number of hydrogen atoms connected to atom. Take into account any type of bonds with hydrogen atoms. """ - return self._hydrogens[n] + self.explicit_hydrogens(n) + return self._atoms[n].total_hydrogens @cached_args_method def adjacency_matrix(self, set_bonds=False, /): @@ -172,6 +158,8 @@ def adjacency_matrix(self, set_bonds=False, /): :param set_bonds: if True set bond orders instead of 1. """ + from numpy import uint, zeros + adj = zeros((len(self), len(self)), dtype=uint) mapping = {n: x for x, n in enumerate(self._atoms)} if set_bonds: @@ -191,24 +179,25 @@ def molecular_charge(self) -> int: """ Total charge of molecule """ - return sum(self._charges.values()) + return sum(a.charge for a in self._atoms.values()) @cached_property def is_radical(self) -> bool: """ True if at least one atom is radical """ - return any(self._radicals.values()) + return any(a.is_radical for a in self._atoms.values()) @cached_property def molecular_mass(self) -> float: - return sum(x.atomic_mass for x in self._atoms.values()) + sum(self._hydrogens.values()) * H().atomic_mass + h = H().atomic_mass + return sum(a.atomic_mass + a.implicit_hydrogens * h for a in self._atoms.values()) @cached_property def brutto(self) -> Dict[str, int]: """Counted atoms dict""" - c = Counter(x.atomic_symbol for x in self._atoms.values()) - c['H'] += sum(self._hydrogens.values()) + c = Counter(a.atomic_symbol for a in self._atoms.values()) + c['H'] += sum(a.implicit_hydrogens for a in self._atoms.values()) return dict(c) @cached_property @@ -220,8 +209,7 @@ def aromatic_rings(self) -> Tuple[Tuple[int, ...], ...]: return tuple(ring for ring in self.sssr if bonds[ring[0]][ring[-1]] == 4 and all(bonds[n][m] == 4 for n, m in zip(ring, ring[1:]))) - def add_atom(self, atom: Union[Element, int, str], *args, charge=0, is_radical=False, - xy: Tuple[float, float] = (0., 0.), _skip_hydrogen_calculation=False, **kwargs): + def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=False, **kwargs): """ Add new atom. """ @@ -232,27 +220,17 @@ def add_atom(self, atom: Union[Element, int, str], *args, charge=0, is_radical=F atom = Element.from_atomic_number(atom)() else: raise TypeError('Element object expected') - if not isinstance(xy, tuple) or len(xy) != 2 or not isinstance(xy[0], float) or not isinstance(xy[1], float): - raise TypeError('XY should be tuple with 2 float') - - n = super().add_atom(atom, *args, charge=charge, is_radical=is_radical, **kwargs) - self._plane[n] = xy - self._conformers.clear() # clean conformers. need full recalculation for new system - if _skip_hydrogen_calculation: - self._hydrogens[n] = None - elif atom.atomic_number != 1: - try: - rules = atom.valence_rules(charge, is_radical, 0) - except ValenceError: - self._hydrogens[n] = None - else: - self._hydrogens[n] = rules[0][2] # first rule without neighbors + n = super().add_atom(atom, *args, **kwargs) + if self._changed is None: + self._changed = [n] else: - self._hydrogens[n] = 0 + self._changed.append(n) + if not _skip_calculation: + self.fix_labels() return n - def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_hydrogen_calculation=False): + def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): """ Connect atoms with bonds. @@ -263,21 +241,18 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_hydrogen_calculation=F if not isinstance(bond, Bond): bond = Bond(bond) - bond._attach_graph(self, n, m) super().add_bond(n, m, bond) - self._conformers.clear() # clean conformers. need full recalculation for new system - - if _skip_hydrogen_calculation: # skip stereo fixing too - return - - self._calc_implicit(n) - self._calc_implicit(m) - - if self._atoms[n].atomic_number != 1 and self._atoms[m].atomic_number != 1: # not hydrogen - # fix stereo if formed not to hydrogen bond - self.fix_stereo() + if bond.order == 8: + return # any bond doesn't change anything + if self._changed is None: + self._changed = [n, n] + else: + self._changed.append(n) + self._changed.append(m) + if not _skip_calculation: + self.fix_labels() - def delete_atom(self, n: int, *, _skip_hydrogen_calculation=False): + def delete_atom(self, n: int, *, _skip_calculation=False): """ Remove atom. @@ -285,25 +260,25 @@ def delete_atom(self, n: int, *, _skip_hydrogen_calculation=False): Implicit hydrogens marks will not be set if atoms in aromatic rings. Call `kekule()` and `thiele()` in sequence to fix marks. """ + atoms = self._atoms ngb = self._bonds.pop(n) - fix = self._atoms.pop(n).atomic_number != 1 and ngb and not _skip_hydrogen_calculation - - del self._charges[n] - del self._radicals[n] - del self._hydrogens[n] - del self._plane[n] + atom_n = atoms.pop(n) - for m in ngb: + for m, bond in self._bonds.pop(n).items(): del self._bonds[m][n] - if not _skip_hydrogen_calculation: + if bond.order == 8: + continue + if self._changed is None: + self._changed = [m] + else: + self._changed.append(m) + atom_m = atoms[m] + atom_m._neighbors -= 1 + if atom_n.atomic_number not in (1, 6): + atom_m._heteroatoms -= 1 + if not _skip_calculation: self._calc_implicit(m) - self._conformers.clear() # clean conformers. need full recalculation for new system - try: - del self._parsed_mapping[n] - except KeyError: - pass - if fix: # hydrogen atom not used for stereo coding self.fix_stereo() self.flush_cache() @@ -396,28 +371,13 @@ def remap(self, mapping: Dict[int, int], *, copy: bool = False) -> 'MoleculeCont def copy(self) -> 'MoleculeContainer': copy = super().copy() - - copy._bonds = cb = {} - for n, m_bond in self._bonds.items(): - cb[n] = cbn = {} - for m, bond in m_bond.items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] - else: - cbn[m] = bond = bond.copy() - bond._attach_graph(copy, n, m) - - copy._MoleculeContainer__name = self.__name - if self.__meta is None: - copy._MoleculeContainer__meta = None + copy._name = self._name + if self._meta is None: + copy._meta = None else: - copy._MoleculeContainer__meta = self.__meta.copy() - copy._plane = self._plane.copy() - copy._hydrogens = self._hydrogens.copy() + copy._meta = self._meta.copy() copy._parsed_mapping = self._parsed_mapping.copy() copy._conformers = [c.copy() for c in self._conformers] - copy._atoms_stereo = self._atoms_stereo.copy() - copy._allenes_stereo = self._allenes_stereo.copy() copy._cis_trans_stereo = self._cis_trans_stereo.copy() return copy @@ -951,7 +911,7 @@ def _cpack(self, order=None, check=True): def _augmented_substructure(self, atoms: Iterable[int], deep: int): atoms = set(atoms) bonds = self._bonds - if atoms - self._atoms.keys(): + if atoms - bonds.keys(): raise ValueError('invalid atom numbers') nodes = [atoms] for _ in range(deep): @@ -967,22 +927,20 @@ def _calc_implicit(self, n: int): """ atoms = self._atoms atom = atoms[n] - if (an := atom.atomic_number) == 1: # hydrogen nether has implicit H - self._hydrogens[n] = 0 + if atom.atomic_number == 1: # hydrogen nether has implicit H + atom._implicit_hydrogens = 0 return - charge: int = self._charges[n] - is_radical = self._radicals[n] explicit_sum = 0 explicit_dict = defaultdict(int) aroma = 0 for m, bond in self._bonds[n].items(): order = bond.order if order == 4: # only neutral carbon aromatic rings supported - if not charge and not is_radical and an == 6: + if not atom.charge and not atom.is_radical and atom.atomic_number == 6: aroma += 1 else: # use `kekule()` to calculate proper implicit hydrogens count - self._hydrogens[n] = None + atom._implicit_hydrogens = None return elif order != 8: # any bond used for complexes explicit_sum += order @@ -990,32 +948,32 @@ def _calc_implicit(self, n: int): if aroma == 2: if explicit_sum == 0: # H-Ar - self._hydrogens[n] = 1 + atom._implicit_hydrogens = 1 elif explicit_sum == 1: # R-Ar - self._hydrogens[n] = 0 + atom._implicit_hydrogens = 0 else: # invalid aromaticity - self._hydrogens[n] = None + atom._implicit_hydrogens = None return elif aroma == 3: # condensed rings if explicit_sum: # invalid aromaticity - self._hydrogens[n] = None + atom._implicit_hydrogens = None else: - self._hydrogens[n] = 0 + atom._implicit_hydrogens = 0 return elif aroma: - self._hydrogens[n] = None + atom._implicit_hydrogens = None return try: - rules = atom.valence_rules(charge, is_radical, explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: - self._hydrogens[n] = None + atom._implicit_hydrogens = None return for s, d, h in rules: if s.issubset(explicit_dict) and all(explicit_dict[k] >= c for k, c in d.items()): - self._hydrogens[n] = h + atom._implicit_hydrogens = h return - self._hydrogens[n] = None # rule not found + atom._implicit_hydrogens = None # rule not found def _check_implicit(self, n: int, h: int) -> bool: atoms = self._atoms @@ -1035,7 +993,7 @@ def _check_implicit(self, n: int, h: int) -> bool: explicit_dict[(order, atoms[m].atomic_number)] += 1 try: - rules = atom.valence_rules(self._charges[n], self._radicals[n], explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: return False for s, d, _h in rules: diff --git a/chython/containers/query.py b/chython/containers/query.py index abe4dcaf..5024e915 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -24,157 +24,35 @@ from ..algorithms.smiles import QuerySmiles from ..algorithms.stereo import Stereo from ..periodictable import Element, ListElement, QueryElement -from ..periodictable.element import Query - - -def _validate_neighbors(neighbors): - if neighbors is None: - neighbors = () - elif isinstance(neighbors, int): - if neighbors < 0 or neighbors > 14: - raise ValueError('neighbors should be in range [0, 14]') - neighbors = (neighbors,) - elif isinstance(neighbors, (tuple, list)): - if not all(isinstance(n, int) for n in neighbors): - raise TypeError('neighbors should be list or tuple of ints') - if any(n < 0 or n > 14 for n in neighbors): - raise ValueError('neighbors should be in range [0, 14]') - if len(set(neighbors)) != len(neighbors): - raise ValueError('neighbors should be unique') - neighbors = tuple(sorted(neighbors)) - else: - raise TypeError('neighbors should be int or list or tuple of ints') - return neighbors +from ..periodictable.base import Query class QueryContainer(Stereo, Graph[Query, QueryBond], QueryIsomorphism, QuerySmiles): - __slots__ = ('_neighbors', '_hybridizations', '_hydrogens', '_rings_sizes', '_heteroatoms', '_masked') - - _neighbors: Dict[int, Tuple[int, ...]] - _hybridizations: Dict[int, Tuple[int, ...]] - _hydrogens: Dict[int, Tuple[int, ...]] - _rings_sizes: Dict[int, Tuple[int, ...]] - _heteroatoms: Dict[int, Tuple[int, ...]] - _masked: Dict[int, bool] - - def __init__(self): - super().__init__() - self._neighbors = {} - self._hybridizations = {} - self._hydrogens = {} - self._rings_sizes = {} - self._heteroatoms = {} - self._masked = {} - - def add_atom(self, atom: Union[Query, Element, int, str], *args, - neighbors: Union[int, List[int], Tuple[int, ...], None] = None, - hybridization: Union[int, List[int], Tuple[int, ...], None] = None, - hydrogens: Union[int, List[int], Tuple[int, ...], None] = None, - rings_sizes: Union[int, List[int], Tuple[int, ...], None] = None, - heteroatoms: Union[int, List[int], Tuple[int, ...], None] = None, - masked: bool = False, **kwargs): - if hybridization is None: - hybridization = () - elif isinstance(hybridization, int): - if hybridization < 1 or hybridization > 4: - raise ValueError('hybridization should be in range [1, 4]') - hybridization = (hybridization,) - elif isinstance(hybridization, (tuple, list)): - if not all(isinstance(h, int) for h in hybridization): - raise TypeError('hybridizations should be list or tuple of ints') - if any(h < 1 or h > 4 for h in hybridization): - raise ValueError('hybridizations should be in range [1, 4]') - if len(set(hybridization)) != len(hybridization): - raise ValueError('hybridizations should be unique') - hybridization = tuple(sorted(hybridization)) - else: - raise TypeError('hybridization should be int or list or tuple of ints') - - if rings_sizes is None: - rings_sizes = () - elif isinstance(rings_sizes, int): - if rings_sizes < 3 and rings_sizes != 0: - raise ValueError('rings should be greater or equal 3. ring equal to zero is no ring atom mark') - rings_sizes = (rings_sizes,) - elif isinstance(rings_sizes, (tuple, list)): - if not all(isinstance(n, int) for n in rings_sizes): - raise TypeError('rings should be list or tuple of ints') - if any(n < 3 for n in rings_sizes): - raise ValueError('rings should be greater or equal 3') - if len(set(rings_sizes)) != len(rings_sizes): - raise ValueError('rings should be unique') - rings_sizes = tuple(sorted(rings_sizes)) - else: - raise TypeError('rings should be int or list or tuple of ints') - - neighbors = _validate_neighbors(neighbors) - hydrogens = _validate_neighbors(hydrogens) - heteroatoms = _validate_neighbors(heteroatoms) + __slots__ = () + def add_atom(self, atom: Union[Query, Element, int, str], *args, **kwargs): if not isinstance(atom, Query): + # set only basic labels: charge, radical, isotope. use Query object directly for the full control. if isinstance(atom, Element): - atom = QueryElement.from_atomic_number(atom.atomic_number)(atom.isotope) + atom = QueryElement.from_atom(atom) elif isinstance(atom, str): atom = QueryElement.from_symbol(atom)() elif isinstance(atom, int): atom = QueryElement.from_atomic_number(atom)() else: raise TypeError('QueryElement object expected') - - n = super().add_atom(atom, *args, **kwargs) - self._neighbors[n] = neighbors - self._hybridizations[n] = hybridization - self._hydrogens[n] = hydrogens - self._rings_sizes[n] = rings_sizes - self._heteroatoms[n] = heteroatoms - self._masked[n] = masked - return n + return super().add_atom(atom, *args, **kwargs) def add_bond(self, n, m, bond: Union[QueryBond, Bond, int, Tuple[int, ...]]): if isinstance(bond, Bond): bond = QueryBond.from_bond(bond) elif not isinstance(bond, QueryBond): bond = QueryBond(bond) - - sct = self._stereo_cis_trans_paths # save - sa = self._stereo_allenes_paths - super().add_bond(n, m, bond) - # remove stereo marks on bonded atoms and all its bonds - if n in self._atoms_stereo: - del self._atoms_stereo[n] - if m in self._atoms_stereo: - del self._atoms_stereo[m] - if self._cis_trans_stereo: - for nm, path in sct.items(): - if (n in path or m in path) and nm in self._cis_trans_stereo: - del self._cis_trans_stereo[nm] - if self._allenes_stereo: - for c, path in sa.items(): - if (n in path or m in path) and c in self._allenes_stereo: - del self._allenes_stereo[c] def copy(self) -> 'QueryContainer': copy = super().copy() - - copy._bonds = cb = {} - for n, m_bond in self._bonds.items(): - cb[n] = cbn = {} - for m, bond in m_bond.items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] - else: - cbn[m] = bond.copy() - - copy._neighbors = self._neighbors.copy() - copy._hybridizations = self._hybridizations.copy() - copy._hydrogens = self._hydrogens.copy() - copy._heteroatoms = self._heteroatoms.copy() - copy._rings_sizes = self._rings_sizes.copy() - copy._atoms_stereo = self._atoms_stereo.copy() - copy._allenes_stereo = self._allenes_stereo.copy() copy._cis_trans_stereo = self._cis_trans_stereo.copy() - copy._masked = self._masked.copy() return copy def union(self, other: 'QueryContainer', *, remap: bool = False, copy: bool = True) -> 'QueryContainer': diff --git a/chython/periodictable/base/__init__.py b/chython/periodictable/base/__init__.py index f63b3bb6..f8ca87e8 100644 --- a/chython/periodictable/base/__init__.py +++ b/chython/periodictable/base/__init__.py @@ -21,4 +21,4 @@ from .query import * -__all__ = ['Element', 'DynamicElement', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Element', 'DynamicElement', 'Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index c3703336..d1c1edd0 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -24,7 +24,9 @@ class Element(ABC): - __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens') + __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens', + '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_xyz', + '_neighbors', '_heteroatoms', '_hybridization') __class_cache__ = {} def __init__(self, isotope: Optional[int] = None): @@ -43,6 +45,11 @@ def __init__(self, isotope: Optional[int] = None): self._is_radical = False self._x = self._y = 0 self._implicit_hydrogens = None + self._explicit_hydrogens = 0 + self._neighbors = 0 + self._heteroatoms = 0 + self._hybridization = 1 + self._stereo = None def __repr__(self): if self._isotope: @@ -183,45 +190,33 @@ def xy(self, value: Tuple[float, float]): def implicit_hydrogens(self) -> Optional[int]: return self._implicit_hydrogens - def copy(self): - copy = object.__new__(self.__class__) - copy._isotope = self.isotope - copy._charge = self.charge - copy._is_radical = self.is_radical - return copy - - def __copy__(self): - return self.copy() - @property def explicit_hydrogens(self) -> int: - try: - return self._graph().explicit_hydrogens(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._explicit_hydrogens @property def total_hydrogens(self) -> int: - if self._implicit_hydrogens is None: + if self.implicit_hydrogens is None: raise ValenceError - return self._implicit_hydrogens + self.explicit_hydrogens + return self.implicit_hydrogens + self.explicit_hydrogens + + @property + def stereo(self): + """ + Tetrahedron or allene stereo label + """ + return self._stereo @property def heteroatoms(self) -> int: - try: - return self._graph().heteroatoms(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._heteroatoms @property def neighbors(self) -> int: """ Neighbors count of atom """ - try: - return self._graph().neighbors(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._neighbors @property def hybridization(self): @@ -230,10 +225,26 @@ def hybridization(self): of single bonded, 3 - if has one triple bonded and any amount of double and single bonded neighbors or two double bonded and any amount of single bonded neighbors, 4 - if atom in aromatic ring. """ - try: - return self._graph().hybridization(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._hybridization + + def copy(self, full=False): + copy = object.__new__(self.__class__) + copy._isotope = self.isotope + copy._charge = self.charge + copy._is_radical = self.is_radical + if full: + copy._x = self.x + copy._y = self.y + copy._implicit_hydrogens = self.implicit_hydrogens + copy._explicit_hydrogens = self.explicit_hydrogens + copy._stereo = self.stereo + copy._neighbors = self.neighbors + copy._heteroatoms = self.heteroatoms + copy._hybridization = self.hybridization + return copy + + def __copy__(self): + return self.copy() @property def ring_sizes(self) -> Tuple[int, ...]: @@ -302,13 +313,13 @@ def __eq__(self, other): def __hash__(self): return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.implicit_hydrogens or 0)) - def valence_rules(self, charge: int, is_radical: bool, valence: int) -> \ + def valence_rules(self, valence: int) -> \ List[Tuple[Set[Tuple[int, 'Element']], Dict[Tuple[int, 'Element'], int], int]]: """ valence rules for element with specific charge/radical state """ try: - return self._compiled_valence_rules[(charge, is_radical, valence)] + return self._compiled_valence_rules[(self.charge, self.is_radical, valence)] except KeyError: raise ValenceError diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 5ae7adb5..2cc55367 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -47,12 +47,13 @@ def _validate(value, prop): class Query(ABC): - __slots__ = ('_neighbors', '_hybridization', '_masked') + __slots__ = ('_neighbors', '_hybridization', '_masked', '_stereo') def __init__(self): self._neighbors = () self._hybridization = () self._masked = False + self._stereo = None @property def neighbors(self) -> Tuple[int, ...]: @@ -95,11 +96,17 @@ def masked(self, value): raise TypeError('masked should be bool') self._masked = value - def copy(self): + @property + def stereo(self): + return self._stereo + + def copy(self, full=False): copy = object.__new__(self.__class__) copy._neighbors = self.neighbors copy._hybridization = self.hybridization - copy._masked = self.masked + if full: + copy._masked = self.masked + copy._stereo = self.stereo return copy def __copy__(self): @@ -190,8 +197,8 @@ def ring_sizes(self, value): else: raise TypeError('rings should be int or list or tuple of ints') - def copy(self): - copy = super().copy() + def copy(self, full=False): + copy = super().copy(full=full) copy._charge = self.charge copy._is_radical = self.is_radical copy._heteroatoms = self.heteroatoms @@ -296,8 +303,8 @@ def atomic_symbol(self) -> str: def atomic_numbers(self): return tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() if x.__name__ in self._elements) - def copy(self): - copy = super().copy() + def copy(self, full=False): + copy = super().copy(full=full) copy._elements = self._elements return copy @@ -353,7 +360,7 @@ def __repr__(self): class QueryElement(ExtendedQuery, ABC): __slots__ = ('_isotope',) - def __init__(self, isotope: Optional[int]): + def __init__(self, isotope: Optional[int] = None): if isotope is not None and not isinstance(isotope, int): raise TypeError('isotope must be an int') super().__init__() @@ -420,8 +427,8 @@ def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': raise TypeError('Element or Query expected') return atom.copy() - def copy(self): - copy = super().copy() + def copy(self, full=False): + copy = super().copy(full=full) copy._isotope = self.isotope return copy From 9430396318951b9e56ac7cae0ed8acef7ad18a42 Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 11:45:20 +0100 Subject: [PATCH 03/51] Refactor and clean up molecule and bond handling Refactored molecule.py, bonds.py, graph.py, and query.py for improved clarity and efficiency. Removed unused methods and redundant code, consolidated bond handling logic, and replaced lists with sets for tracking changes. --- chython/containers/bonds.py | 162 ++++++++++++----------- chython/containers/graph.py | 73 ++--------- chython/containers/molecule.py | 178 ++++---------------------- chython/containers/query.py | 157 +---------------------- chython/periodictable/base/element.py | 50 ++++---- chython/periodictable/base/query.py | 6 +- pyproject.toml | 2 +- 7 files changed, 145 insertions(+), 483 deletions(-) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index cb61af29..e6014c1e 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2022 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,94 +17,74 @@ # along with this program; if not, see . # from typing import Optional, Tuple, Union, List, Set -from weakref import ref -from ..exceptions import IsConnectedBond, IsNotConnectedBond class Bond: - __slots__ = ('__order', '__graph', '__n', '__m') + __slots__ = ('_order', '_in_ring', '_stereo') def __init__(self, order: int): if not isinstance(order, int): raise TypeError('invalid order value') elif order not in (1, 4, 2, 3, 8): raise ValueError('order should be from [1, 2, 3, 4, 8]') - self.__order = order + self._order = order + self._in_ring = False + self._stereo = None def __eq__(self, other): if isinstance(other, Bond): - return self.__order == other.order + return self.order == other.order elif isinstance(other, int): - return self.__order == other + return self.order == other return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order})' + return f'{self.__class__.__name__}({self.order})' def __int__(self): """ Bond order. """ - return self.__order + return self.order def __hash__(self): """ Bond order. Used in Morgan atoms ordering. """ - return self.__order - - def __getstate__(self): - return {'order': self.__order} - - def __setstate__(self, state): - self.__order = state['order'] + return self.order @property def order(self) -> int: - return self.__order + return self._order + + @property + def stereo(self) -> Optional[bool]: + return self._stereo @property def in_ring(self) -> bool: - try: - return self.__graph().is_ring_bond(self.__n, self.__m) - except AttributeError: - raise IsNotConnectedBond + return self._in_ring - def copy(self) -> 'Bond': + def copy(self, full=False) -> 'Bond': copy = object.__new__(self.__class__) - copy._Bond__order = self.__order + copy._order = self.order + if full: + copy._stereo = self.stereo + copy._in_ring = self.in_ring return copy + def __copy__(self): + return self.copy() + @classmethod def from_bond(cls, bond): - if isinstance(bond, cls): - copy = object.__new__(cls) - copy._Bond__order = bond.order - return copy + if isinstance(bond, Bond): + return cls(bond.order) raise TypeError('Bond expected') - def _attach_graph(self, graph, n, m): - try: - self.__graph - except AttributeError: - self.__graph = ref(graph) - self.__n = n - self.__m = m - else: - raise IsConnectedBond - - def _change_map(self, n, m): - try: - self.__graph - except AttributeError: - raise IsNotConnectedBond - else: - self.__n = n - self.__m = m - class DynamicBond: - __slots__ = ('__order', '__p_order') + __slots__ = ('_order', '_p_order') def __init__(self, order=None, p_order=None): if order is None: @@ -118,16 +98,16 @@ def __init__(self, order=None, p_order=None): if order not in (1, 4, 2, 3, None, 8) or p_order not in (1, 4, 2, 3, None, 8): raise ValueError('order or p_order should be from [1, 2, 3, 4, 8]') - self.__order = order - self.__p_order = p_order + self._order = order + self._p_order = p_order def __eq__(self, other): if isinstance(other, DynamicBond): - return self.__order == other.order and self.__p_order == other.p_order + return self.order == other.order and self.p_order == other.p_order return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order}, {self.__p_order})' + return f'{self.__class__.__name__}({self.order}, {self.p_order})' def __int__(self): """ @@ -139,47 +119,51 @@ def __hash__(self): """ Hash of bond orders. """ - return hash((self.__order or 0, self.__p_order or 0)) + return hash((self.order or 0, self.p_order or 0)) @property def is_dynamic(self) -> bool: """ Bond has dynamic features """ - return self.__order != self.__p_order + return self.order != self.p_order @property def order(self) -> Optional[int]: - return self.__order + return self._order @property def p_order(self) -> Optional[int]: - return self.__p_order + return self._p_order def copy(self) -> 'DynamicBond': copy = object.__new__(self.__class__) - copy._DynamicBond__order = self.__order - copy._DynamicBond__p_order = self.__p_order + copy._order = self.order + copy._p_order = self.p_order return copy + def __copy__(self): + return self.copy() + @classmethod def from_bond(cls, bond): if isinstance(bond, Bond): copy = object.__new__(cls) - copy._DynamicBond__order = copy._DynamicBond__p_order = bond.order + copy._order = copy._p_order = bond.order return copy elif isinstance(bond, cls): copy = object.__new__(cls) - copy._DynamicBond__order = bond.order - copy._DynamicBond__p_order = bond.p_order + copy._order = bond.order + copy._p_order = bond.p_order return copy raise TypeError('DynamicBond expected') class QueryBond: - __slots__ = ('__order', '__in_ring') + __slots__ = ('_order', '_in_ring', '_stereo') - def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], in_ring: Optional[bool] = None): + def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], + in_ring: Optional[bool] = None, stereo: Optional[bool] = None): if isinstance(order, (list, tuple, set)): if not all(isinstance(x, int) for x in order): raise TypeError('invalid order value') @@ -194,63 +178,75 @@ def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], in_r raise TypeError('invalid order value') if in_ring is not None and not isinstance(in_ring, bool): raise TypeError('in_ring mark should be boolean or None') - self.__order = order - self.__in_ring = in_ring + if stereo is not None and not isinstance(stereo, bool): + raise TypeError('stereo mark should be boolean or None') + self._order = order + self._in_ring = in_ring + self._stereo = stereo def __eq__(self, other): if isinstance(other, Bond): - if self.__in_ring is not None: - if self.__in_ring != other.in_ring: + if self.in_ring is not None: + if self.in_ring != other.in_ring: return False - return other.order in self.__order + return other.order in self.order elif isinstance(other, QueryBond): - return self.__order == other.order and self.__in_ring == other.in_ring + return self.order == other.order and self.in_ring == other.in_ring elif isinstance(other, int): - return other in self.__order + return other in self.order return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order}, {self.__in_ring})' + return f'{self.__class__.__name__}({self.order}, {self.in_ring})' def __int__(self): """ Simple bond order or hash of sorted tuple of orders. """ - if len(self.__order) == 1: - return self.__order[0] - return hash(self.__order) + if len(self.order) == 1: + return self.order[0] + return hash(self.order) def __hash__(self): """ Hash of orders and cycle mark. Used in Morgan atoms ordering. """ - return hash((self.__order, self.__in_ring)) + return hash((self.order, self.in_ring)) @property def order(self) -> Tuple[int, ...]: - return self.__order + return self._order @property def in_ring(self) -> Optional[bool]: - return self.__in_ring + return self._in_ring + + @property + def stereo(self): + return self._stereo - def copy(self) -> 'QueryBond': + def copy(self, full=False) -> 'QueryBond': copy = object.__new__(self.__class__) - copy._QueryBond__order = self.__order - copy._QueryBond__in_ring = self.__in_ring + copy._order = self.order + copy._in_ring = self.in_ring + if full: + copy._stereo = self.stereo return copy + def __copy__(self): + return self.copy() + @classmethod def from_bond(cls, bond): if isinstance(bond, Bond): copy = object.__new__(cls) - copy._QueryBond__order = (bond.order,) - copy._QueryBond__in_ring = None + copy._order = (bond.order,) + copy._in_ring = None return copy elif isinstance(bond, cls): copy = object.__new__(cls) - copy._QueryBond__order = bond.order - copy._QueryBond__in_ring = bond.in_ring + copy._order = bond.order + copy._in_ring = bond.in_ring return copy raise TypeError('QueryBond or Bond expected') diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 17f7a175..54470b35 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -29,17 +29,15 @@ class Graph(Generic[Atom, Bond], Morgan, Rings, ABC): - __slots__ = ('_atoms', '_bonds', '_cis_trans_stereo', '__dict__', '__weakref__') + __slots__ = ('_atoms', '_bonds', '__dict__') __class_cache__ = {} _atoms: Dict[int, Atom] _bonds: Dict[int, Dict[int, Bond]] - _cis_trans_stereo: Dict[Tuple[int, int], bool] def __init__(self): self._atoms = {} self._bonds = {} - self._cis_trans_stereo = {} def atom(self, n: int) -> Atom: return self._atoms[n] @@ -121,14 +119,12 @@ def add_bond(self, n: int, m: int, bond: Bond): self._bonds[n][m] = self._bonds[m][n] = bond self.flush_cache() - @abstractmethod def copy(self): """ copy of graph """ copy = object.__new__(self.__class__) copy._atoms = {n: atom.copy(full=True) for n, atom in self._atoms.items()} - copy._bonds = cb = {} for n, m_bond in self._bonds.items(): cb[n] = cbn = {} @@ -139,63 +135,39 @@ def copy(self): cbn[m] = bond.copy() return copy - @abstractmethod - def remap(self, mapping: Dict[int, int], *, copy=False): + def remap(self, mapping: Dict[int, int]): """ Change atom numbers :param mapping: mapping of old numbers to the new - :param copy: keep original graph """ if len(mapping) != len(set(mapping.values())) or \ not (self._atoms.keys() - mapping.keys()).isdisjoint(mapping.values()): raise ValueError('mapping overlap') mg = mapping.get - if copy: - h = self.__class__() - h._atoms = {mg(n, n): atom.copy(full=True) for n, atom in self._atoms.items()} - hcs = h._cis_trans_stereo - else: - self._atoms = {mg(n, n): atom for n, atom in self._atoms.items()} - hcs = {} - - for (n, m), stereo in self._cis_trans_stereo.items(): - hcs[(mg(n, n), mg(m, m))] = stereo - - if copy: - return h # noqa - self._cis_trans_stereo = hcs + self._atoms = {mg(n, n): atom for n, atom in self._atoms.items()} + self._bonds = {mg(n, n): {mg(m, m): bond for m, bond in m_bond.items()} for n, m_bond in self._bonds.items()} self.flush_cache() - return self - @abstractmethod def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): """ Merge Graphs into one. :param remap: if atoms has collisions then remap other graph atoms else raise exception. - :param copy: keep original structure and return new object + :param copy: keep original structure and return a new object """ if self._atoms.keys() & other._atoms.keys(): - if remap: - other = other.remap({n: i for i, n in enumerate(other, start=max(self._atoms) + 1)}, copy=True) - else: + if not remap: raise MappingError('mapping of graphs is not disjoint') - + other = other.copy() + other.remap({n: i for i, n in enumerate(other, start=max(self._atoms) + 1)}) + else: + other = other.copy() # make a copy u = self.copy() if copy else self - u._charges.update(other._charges) - u._radicals.update(other._radicals) - - ua = u._atoms - for n, atom in other._atoms.items(): - ua[n] = atom = atom.copy() - atom._attach_graph(u, n) - - u._atoms_stereo.update(other._atoms_stereo) - u._allenes_stereo.update(other._allenes_stereo) - u._cis_trans_stereo.update(other._cis_trans_stereo) - return u, other + u._atoms.update(other._atoms) + u._bonds.update(other._bonds) + return u def flush_cache(self): self.__dict__.clear() @@ -224,24 +196,5 @@ def __iter__(self) -> Iterator[int]: def __bool__(self): return bool(self._atoms) - def __getstate__(self): - state = {'atoms': self._atoms, 'bonds': self._bonds, 'charges': self._charges, - 'radicals': self._radicals} - from chython import pickle_cache - - if pickle_cache: - state['cache'] = {k: v for k, v in self.__dict__.items() if k != '__cached_method___hash__'} - return state - - def __setstate__(self, state): - self._atoms = state['atoms'] - for n, a in state['atoms'].items(): - a._attach_graph(self, n) - self._charges = state['charges'] - self._radicals = state['radicals'] - self._bonds = state['bonds'] - if 'cache' in state: - self.__dict__.update(state['cache']) - __all__ = ['Graph'] diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 2c67fed2..a4b5c8ef 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -105,20 +105,7 @@ def hybridization(self, n: int) -> int: of single bonded, 3 - if has one triple bonded and any amount of double and single bonded neighbors or two and more double bonded and any amount of single bonded neighbors, 4 - if atom in aromatic ring. """ - hybridization = 1 - for bond in self._bonds[n].values(): - order = bond.order - if order == 4: - return 4 - elif order == 3: - if hybridization != 3: - hybridization = 3 - elif order == 2: - if hybridization == 1: - hybridization = 2 - elif hybridization == 2: - hybridization = 3 - return hybridization + return self._atoms[n].hybridization @cached_args_method def heteroatoms(self, n: int) -> int: @@ -223,9 +210,9 @@ def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=Fals n = super().add_atom(atom, *args, **kwargs) if self._changed is None: - self._changed = [n] + self._changed = {n} else: - self._changed.append(n) + self._changed.add(n) if not _skip_calculation: self.fix_labels() return n @@ -245,10 +232,10 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): if bond.order == 8: return # any bond doesn't change anything if self._changed is None: - self._changed = [n, n] + self._changed = {n, m} else: - self._changed.append(n) - self._changed.append(m) + self._changed.add(n) + self._changed.add(m) if not _skip_calculation: self.fix_labels() @@ -260,30 +247,19 @@ def delete_atom(self, n: int, *, _skip_calculation=False): Implicit hydrogens marks will not be set if atoms in aromatic rings. Call `kekule()` and `thiele()` in sequence to fix marks. """ - atoms = self._atoms - ngb = self._bonds.pop(n) - atom_n = atoms.pop(n) - + del self._atoms[n] for m, bond in self._bonds.pop(n).items(): del self._bonds[m][n] if bond.order == 8: continue if self._changed is None: - self._changed = [m] + self._changed = {m} else: - self._changed.append(m) - atom_m = atoms[m] - atom_m._neighbors -= 1 - if atom_n.atomic_number not in (1, 6): - atom_m._heteroatoms -= 1 - if not _skip_calculation: - self._calc_implicit(m) - - if fix: # hydrogen atom not used for stereo coding - self.fix_stereo() - self.flush_cache() - - def delete_bond(self, n: int, m: int, *, _skip_hydrogen_calculation=False): + self._changed.add(m) + if not _skip_calculation: + self.fix_labels() + + def delete_bond(self, n: int, m: int, *, _skip_calculation=False): """ Disconnect atoms. @@ -292,82 +268,14 @@ def delete_bond(self, n: int, m: int, *, _skip_hydrogen_calculation=False): Call `kekule()` and `thiele()` in sequence to fix marks. """ del self._bonds[n][m] - del self._bonds[m][n] - self._conformers.clear() # clean conformers. need full recalculation for new system - - if not _skip_hydrogen_calculation: - self._calc_implicit(n) - self._calc_implicit(m) - - if self._atoms[n].atomic_number != 1 and self._atoms[m].atomic_number != 1 and not _skip_hydrogen_calculation: - self.fix_stereo() - self.flush_cache() - - def remap(self, mapping: Dict[int, int], *, copy: bool = False) -> 'MoleculeContainer': - atoms = self._atoms # keep original atoms dict - h = super().remap(mapping, copy=copy) - - mg = mapping.get - sp = self._plane - shg = self._hydrogens - - if copy: - h._MoleculeContainer__name = self.__name - if self.__meta is not None: - h._MoleculeContainer__meta = self.__meta.copy() - hb = h._bonds - hp = h._plane - hhg = h._hydrogens - hcf = h._conformers - hm = h._parsed_mapping - - # deep copy of bonds - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond = bond.copy() - bond._attach_graph(h, n, m) - else: - hb = {} - hp = {} - hhg = {} - hcf = [] - hm = {} - - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond - bond._change_map(n, m) - - for n in atoms: - m = mg(n, n) - hp[m] = sp[n] - hhg[m] = shg[n] - - hcf.extend({mg(n, n): x for n, x in c.items()} for c in self._conformers) - for n, m in self._parsed_mapping.items(): - hm[mg(n, n)] = m - - if copy: - return h - - self._bonds = hb - self._plane = hp - self._hydrogens = hhg - self._conformers = hcf - self._parsed_mapping = hm - return self + if self._bonds[m].pop(n).order != 8: + if self._changed is None: + self._changed = {n, m} + else: + self._changed.add(n) + self._changed.add(m) + if not _skip_calculation: + self.fix_labels() def copy(self) -> 'MoleculeContainer': copy = super().copy() @@ -376,32 +284,12 @@ def copy(self) -> 'MoleculeContainer': copy._meta = None else: copy._meta = self._meta.copy() - copy._parsed_mapping = self._parsed_mapping.copy() - copy._conformers = [c.copy() for c in self._conformers] - copy._cis_trans_stereo = self._cis_trans_stereo.copy() return copy def union(self, other: 'MoleculeContainer', *, remap: bool = False, copy: bool = True) -> 'MoleculeContainer': if not isinstance(other, MoleculeContainer): raise TypeError('MoleculeContainer expected') - u, o = super().union(other, remap=remap, copy=copy) - - ub = u._bonds - for n, m_bond in o._bonds.items(): - ub[n] = ubn = {} - for m, bond in m_bond.items(): - if m in ub: # bond partially exists. need back-connection. - ubn[m] = ub[m][n] - else: - ubn[m] = bond = bond.copy() - bond._attach_graph(u, n, m) - - u._MoleculeContainer__name = u._MoleculeContainer__meta = None - u._conformers.clear() - u._plane.update(o._plane) - u._hydrogens.update(o._hydrogens) - u._parsed_mapping.update(o._parsed_mapping) - return u + return super().union(other, remap=remap, copy=copy) def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalculate_hydrogens=True, skip_neighbors_marks=False, skip_hybridizations_marks=False, skip_hydrogens_marks=False, @@ -1078,27 +966,5 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.flush_cache() del self._backup - def __getstate__(self): - return {'conformers': self._conformers, 'hydrogens': self._hydrogens, 'atoms_stereo': self._atoms_stereo, - 'allenes_stereo': self._allenes_stereo, 'cis_trans_stereo': self._cis_trans_stereo, - 'parsed_mapping': self._parsed_mapping, 'meta': self.__meta, 'name': self.__name, - 'plane': self._plane, **super().__getstate__()} - - def __setstate__(self, state): - super().__setstate__(state) - self._conformers = state['conformers'] - self._atoms_stereo = state['atoms_stereo'] - self._allenes_stereo = state['allenes_stereo'] - self._cis_trans_stereo = state['cis_trans_stereo'] - self._hydrogens = state['hydrogens'] - self._parsed_mapping = state['parsed_mapping'] - self._plane = state['plane'] - self.__meta = state['meta'] - self.__name = state['name'] - - # attach bonds to graph - for n, m, b in self.bonds(): - b._attach_graph(self, n, m) - __all__ = ['MoleculeContainer'] diff --git a/chython/containers/query.py b/chython/containers/query.py index 5024e915..7a218786 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,14 +16,13 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from itertools import chain, product -from typing import Dict, List, Tuple, Union +from typing import Tuple, Union from .bonds import Bond, QueryBond from .graph import Graph from ..algorithms.isomorphism import QueryIsomorphism from ..algorithms.smiles import QuerySmiles from ..algorithms.stereo import Stereo -from ..periodictable import Element, ListElement, QueryElement +from ..periodictable import Element, QueryElement from ..periodictable.base import Query @@ -50,158 +49,10 @@ def add_bond(self, n, m, bond: Union[QueryBond, Bond, int, Tuple[int, ...]]): bond = QueryBond(bond) super().add_bond(n, m, bond) - def copy(self) -> 'QueryContainer': - copy = super().copy() - copy._cis_trans_stereo = self._cis_trans_stereo.copy() - return copy - def union(self, other: 'QueryContainer', *, remap: bool = False, copy: bool = True) -> 'QueryContainer': if not isinstance(other, QueryContainer): raise TypeError('QueryContainer expected') - u, o = super().union(other, remap=remap, copy=copy) - - ub = u._bonds - for n, m_bond in o._bonds.items(): - ub[n] = ubn = {} - for m, bond in m_bond.items(): - if m in ub: # bond partially exists. need back-connection. - ubn[m] = ub[m][n] - else: - ubn[m] = bond.copy() - - u._neighbors.update(o._neighbors) - u._hybridizations.update(o._hybridizations) - u._hydrogens.update(o._hydrogens) - u._rings_sizes.update(o._rings_sizes) - u._heteroatoms.update(o._heteroatoms) - u._masked.update(o._masked) - return u - - def remap(self, mapping: Dict[int, int], *, copy=False) -> 'QueryContainer': - atoms = self._atoms # keep original atoms dict - h = super().remap(mapping, copy=copy) - - mg = mapping.get - hydrogens = self._hydrogens - neighbors = self._neighbors - hybridizations = self._hybridizations - heteroatoms = self._heteroatoms - rings_sizes = self._rings_sizes - masked = self._masked - - if copy: - hb = h._bonds - hhg = h._hydrogens - hn = h._neighbors - hh = h._hybridizations - hx = h._heteroatoms - hrs = h._rings_sizes - hm = h._masked - - # deep copy of bonds - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond.copy() - else: - hb = {} - hhg = {} - hn = {} - hh = {} - hx = {} - hrs = {} - hm = {} - - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond - - for n in atoms: - m = mg(n, n) - hhg[m] = hydrogens[n] - hn[m] = neighbors[n] - hh[m] = hybridizations[n] - hx[m] = heteroatoms[n] - hrs[m] = rings_sizes[n] - hm[m] = masked[n] - - if copy: - return h # noqa - - self._bonds = hb - self._hydrogens = hhg - self._neighbors = hn - self._hybridizations = hh - self._heteroatoms = hx - self._rings_sizes = hrs - self._masked = hm - return self - - def enumerate_queries(self, *, enumerate_marks: bool = False): - """ - Enumerate complex queries into multiple simple ones. For example `[N,O]-C` into `NC` and `OC`. - - :param enumerate_marks: enumerate multiple marks to separate queries - """ - atoms = [(n, a._numbers) for n, a in self._atoms.items() if isinstance(a, ListElement)] - bonds = [(n, m, b.order) for n, m, b in self.bonds() if len(b.order) > 1] - for combo in product(*(x for *_, x in chain(atoms, bonds))): - copy = self.copy() - for (n, _), a in zip(atoms, combo): - copy._atoms[n] = a = QueryElement.from_atomic_number(a)() - a._attach_graph(copy, n) - for (n, m, _), b in zip(bonds, combo[len(atoms):]): - copy._bonds[n][m]._QueryBond__order = (b,) # noqa - - if enumerate_marks: - c = 0 - slices = [] - data = [] - for attr in ('_neighbors', '_hybridizations', '_hydrogens', '_heteroatoms', '_rings_sizes'): - tmp = [(n, v) for n, v in getattr(self, attr).items() if len(v) > 1] - if tmp: - data.extend(tmp) - slices.append((attr, c, c + len(tmp))) - c += len(tmp) - - for combo2 in product(*(x for _, x in data)): - copy2 = copy.copy() - for attr, i, j in slices: - attr = getattr(copy2, attr) - for (n, _), v in zip(data[i: j], combo2[i: j]): - attr[n] = (v,) - yield copy2 - else: - yield copy - - def __getstate__(self): - return {'atoms_stereo': self._atoms_stereo, 'allenes_stereo': self._allenes_stereo, - 'cis_trans_stereo': self._cis_trans_stereo, 'neighbors': self._neighbors, - 'hybridizations': self._hybridizations, 'hydrogens': self._hydrogens, 'masked': self._masked, - 'rings_sizes': self._rings_sizes, 'heteroatoms': self._heteroatoms, **super().__getstate__()} - - def __setstate__(self, state): - super().__setstate__(state) - self._atoms_stereo = state['atoms_stereo'] - self._allenes_stereo = state['allenes_stereo'] - self._cis_trans_stereo = state['cis_trans_stereo'] - self._neighbors = state['neighbors'] - self._hybridizations = state['hybridizations'] - self._hydrogens = state['hydrogens'] - self._rings_sizes = state['rings_sizes'] - self._heteroatoms = state['heteroatoms'] - self._masked = state['masked'] + return super().union(other, remap=remap, copy=copy) __all__ = ['QueryContainer'] diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index d1c1edd0..d65e039d 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -20,13 +20,13 @@ from CachedMethods import class_cached_property from collections import defaultdict from typing import Dict, List, Optional, Set, Tuple, Type -from ...exceptions import IsNotConnectedAtom, ValenceError +from ...exceptions import ValenceError class Element(ABC): __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens', '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_xyz', - '_neighbors', '_heteroatoms', '_hybridization') + '_neighbors', '_heteroatoms', '_hybridization', '_ring_sizes', '_in_ring') __class_cache__ = {} def __init__(self, isotope: Optional[int] = None): @@ -50,10 +50,12 @@ def __init__(self, isotope: Optional[int] = None): self._heteroatoms = 0 self._hybridization = 1 self._stereo = None + self._ring_sizes = () + self._in_ring = False def __repr__(self): - if self._isotope: - return f'{self.__class__.__name__}({self._isotope})' + if self.isotope: + return f'{self.__class__.__name__}({self.isotope})' return f'{self.__class__.__name__}()' @property @@ -201,7 +203,7 @@ def total_hydrogens(self) -> int: return self.implicit_hydrogens + self.explicit_hydrogens @property - def stereo(self): + def stereo(self) -> Optional[bool]: """ Tetrahedron or allene stereo label """ @@ -227,6 +229,20 @@ def hybridization(self): """ return self._hybridization + @property + def ring_sizes(self) -> Tuple[int, ...]: + """ + Atom rings sizes. + """ + return self._ring_sizes + + @property + def in_ring(self) -> bool: + """ + Atom in any ring. + """ + return self._in_ring + def copy(self, full=False): copy = object.__new__(self.__class__) copy._isotope = self.isotope @@ -241,33 +257,13 @@ def copy(self, full=False): copy._neighbors = self.neighbors copy._heteroatoms = self.heteroatoms copy._hybridization = self.hybridization + copy._ring_sizes = self.ring_sizes + copy._in_ring = self.in_ring return copy def __copy__(self): return self.copy() - @property - def ring_sizes(self) -> Tuple[int, ...]: - """ - Atom rings sizes. - """ - try: - return self._graph().atoms_rings_sizes[self._n] - except AttributeError: - raise IsNotConnectedAtom - except KeyError: - return () - - @property - def in_ring(self) -> bool: - """ - Atom in any ring. - """ - try: - return self._n in self._graph().ring_atoms - except AttributeError: - raise IsNotConnectedAtom - @classmethod def from_symbol(cls, symbol: str) -> Type['Element']: """ diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 2cc55367..4145acf5 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -354,7 +354,7 @@ def __hash__(self): self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) def __repr__(self): - return f'{self.__class__.__name__}([{",".join(self._elements)}])' + return f'{self.__class__.__name__}([{self.atomic_symbol}])' class QueryElement(ExtendedQuery, ABC): @@ -367,8 +367,8 @@ def __init__(self, isotope: Optional[int] = None): self._isotope = isotope def __repr__(self): - if self._isotope: - return f'{self.__class__.__name__}({self._isotope})' + if self.isotope: + return f'{self.__class__.__name__}({self.isotope})' return f'{self.__class__.__name__}()' @property diff --git a/pyproject.toml b/pyproject.toml index 02c177e2..bf8fd347 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = 'chython' -version = '1.81' +version = '2.0' description = 'Library for processing molecules and reactions in python way' authors = ['Ramil Nugmanov '] license = 'LGPLv3' From dcf8c8a5708f7fb8247f3a414ad352ea139125de Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 13:29:38 +0100 Subject: [PATCH 04/51] Refactor molecule structure handling and backup procedures. Simplify the molecule structure by removing redundant charge and radical attributes and streamline the backup procedure by utilizing the copy method. Improved bond copying with added stereo support, and refined element creation from atomic data. --- chython/containers/bonds.py | 56 ++++++++++---------- chython/containers/cgr.py | 55 ++------------------ chython/containers/graph.py | 2 +- chython/containers/molecule.py | 74 +++++---------------------- chython/periodictable/base/dynamic.py | 39 +++++++++++--- chython/periodictable/base/element.py | 38 +++++++++----- chython/periodictable/base/query.py | 29 +++++++---- 7 files changed, 120 insertions(+), 173 deletions(-) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index e6014c1e..88cedd85 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -71,17 +71,14 @@ def copy(self, full=False) -> 'Bond': if full: copy._stereo = self.stereo copy._in_ring = self.in_ring + else: + copy._in_ring = False + copy._stereo = None return copy def __copy__(self): return self.copy() - @classmethod - def from_bond(cls, bond): - if isinstance(bond, Bond): - return cls(bond.order) - raise TypeError('Bond expected') - class DynamicBond: __slots__ = ('_order', '_p_order') @@ -146,17 +143,12 @@ def __copy__(self): return self.copy() @classmethod - def from_bond(cls, bond): - if isinstance(bond, Bond): - copy = object.__new__(cls) - copy._order = copy._p_order = bond.order - return copy - elif isinstance(bond, cls): - copy = object.__new__(cls) - copy._order = bond.order - copy._p_order = bond.p_order - return copy - raise TypeError('DynamicBond expected') + def from_bond(cls, bond: 'Bond') -> 'DynamicBond': + if not isinstance(bond, Bond): + raise TypeError('Bond expected') + copy = object.__new__(cls) + copy._order = copy._p_order = bond.order + return copy class QueryBond: @@ -222,33 +214,37 @@ def in_ring(self) -> Optional[bool]: return self._in_ring @property - def stereo(self): + def stereo(self) -> Optional[bool]: return self._stereo def copy(self, full=False) -> 'QueryBond': copy = object.__new__(self.__class__) copy._order = self.order - copy._in_ring = self.in_ring if full: + copy._in_ring = self.in_ring copy._stereo = self.stereo + else: + copy._in_ring = copy._stereo = None return copy def __copy__(self): return self.copy() @classmethod - def from_bond(cls, bond): - if isinstance(bond, Bond): - copy = object.__new__(cls) - copy._order = (bond.order,) - copy._in_ring = None - return copy - elif isinstance(bond, cls): - copy = object.__new__(cls) - copy._order = bond.order + def from_bond(cls, bond: 'Bond', stereo=False, in_ring=False) -> 'QueryBond': + if not isinstance(bond, Bond): + raise TypeError('Bond expected') + copy = object.__new__(cls) + copy._order = (bond.order,) + if in_ring: copy._in_ring = bond.in_ring - return copy - raise TypeError('QueryBond or Bond expected') + else: + copy._in_ring = None + if stereo: + copy._stereo = bond.stereo + else: + copy._stereo = None + return copy __all__ = ['Bond', 'DynamicBond', 'QueryBond'] diff --git a/chython/containers/cgr.py b/chython/containers/cgr.py index 24959c80..9bdc697d 100644 --- a/chython/containers/cgr.py +++ b/chython/containers/cgr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -28,21 +28,13 @@ class CGRContainer(CGRSmiles, Morgan, Rings, Isomorphism, FingerprintsCGR): - __slots__ = ('_atoms', '_bonds', '_charges', '_radicals', '_p_charges', '_p_radicals', '__dict__', '__weakref__') + __slots__ = ('_atoms', '_bonds', '__dict__') _atoms: Dict[int, DynamicElement] _bonds: Dict[int, Dict[int, DynamicBond]] - _charges: Dict[int, int] - _radicals: Dict[int, bool] - _p_charges: Dict[int, int] - _p_radicals: Dict[int, bool] def __init__(self): self._atoms = {} self._bonds = {} - self._charges = {} - self._radicals = {} - self._p_charges = {} - self._p_radicals = {} def bonds(self) -> Iterator[Tuple[int, int, DynamicBond]]: """ @@ -59,19 +51,8 @@ def bonds(self) -> Iterator[Tuple[int, int, DynamicBond]]: def center_atoms(self) -> Tuple[int, ...]: """ Get list of atoms of reaction center (atoms with dynamic: bonds, charges, radicals). """ - radicals = self._radicals - p_charges = self._p_charges - p_radicals = self._p_radicals - - center = set() - for n, c in self._charges.items(): - if c != p_charges[n] or radicals[n] != p_radicals[n]: - center.add(n) - - for n, m_bond in self._bonds.items(): - if any(bond.order != bond.p_order for bond in m_bond.values()): - center.add(n) - + center = {n for n, a in self._atoms.items() if a.is_dynamic} + center.update(n for n, m_bond in self._bonds.items() if any(bond.is_dynamic for bond in m_bond.values())) return tuple(center) def substructure(self, atoms) -> 'CGRContainer': @@ -82,22 +63,10 @@ def substructure(self, atoms) -> 'CGRContainer': """ atoms = set(atoms) sa = self._atoms - sc = self._charges - sr = self._radicals sb = self._bonds - spc = self._p_charges - spr = self._p_radicals sub = object.__new__(self.__class__) - sub._charges = {n: sc[n] for n in atoms} - sub._radicals = {n: sr[n] for n in atoms} - sub._p_charges = {n: spc[n] for n in atoms} - sub._p_radicals = {n: spr[n] for n in atoms} - - sub._atoms = ca = {} - for n in atoms: - ca[n] = atom = sa[n].copy() - atom._attach_graph(sub, n) + sub._atoms = {n: sa[n].copy() for n in atoms} sub._bonds = cb = {} for n in atoms: @@ -136,19 +105,5 @@ def get_mapping(self, other: 'CGRContainer', /, *, automorphism_filter: bool = T def __iter__(self): return iter(self._atoms) - def __getstate__(self): - return {'atoms': self._atoms, 'bonds': self._bonds, 'charges': self._charges, 'radicals': self._radicals, - 'p_charges': self._p_charges, 'p_radicals': self._p_radicals} - - def __setstate__(self, state): - self._atoms = state['atoms'] - for n, a in state['atoms'].items(): - a._attach_graph(self, n) - self._charges = state['charges'] - self._radicals = state['radicals'] - self._bonds = state['bonds'] - self._p_charges = state['p_charges'] - self._p_radicals = state['p_radicals'] - __all__ = ['CGRContainer'] diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 54470b35..fe3dc720 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -132,7 +132,7 @@ def copy(self): if m in cb: # bond partially exists. need back-connection. cbn[m] = cb[m][n] else: - cbn[m] = bond.copy() + cbn[m] = bond.copy(full=True) return copy def remap(self, mapping: Dict[int, int]): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index a4b5c8ef..5ccf06fc 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -443,16 +443,12 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': if not isinstance(other, MoleculeContainer): raise TypeError('MoleculeContainer expected') sa = self._atoms - sc = self._charges - sr = self._radicals sb = self._bonds bonds = [] adj = defaultdict(lambda: defaultdict(lambda: [None, None])) oa = other._atoms - oc = other._charges - or_ = other._radicals ob = other._bonds common = sa.keys() & oa.keys() @@ -460,38 +456,27 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': h = CGRContainer() ha = h._atoms hb = h._bonds - hc = h._charges - hpc = h._p_charges - hr = h._radicals - hpr = h._p_radicals for n in sa.keys() - common: # cleavage atoms - hc[n] = hpc[n] = sc[n] - hr[n] = hpr[n] = sr[n] + ha[n] = DynamicElement.from_atom(sa[n]) hb[n] = {} - ha[n] = a = DynamicElement.from_atom(sa[n]) - a._attach_graph(h, n) - for m, bond in sb[n].items(): if m not in ha: if m in common: # bond to common atoms is broken bond bond = DynamicBond(bond.order, None) else: - bond = DynamicBond(bond.order, bond.order) + bond = DynamicBond.from_bond(bond) bonds.append((n, m, bond)) for n in oa.keys() - common: # coupling atoms - hc[n] = hpc[n] = oc[n] - hr[n] = hpr[n] = or_[n] + ha[n] = DynamicElement.from_atom(oa[n]) hb[n] = {} - ha[n] = a = DynamicElement.from_atom(oa[n]) - a._attach_graph(h, n) for m, bond in ob[n].items(): if m not in ha: if m in common: # bond to common atoms is formed bond bond = DynamicBond(None, bond.order) else: - bond = DynamicBond(bond.order, bond.order) + bond = DynamicBond.from_bond(bond) bonds.append((n, m, bond)) for n in common: an = adj[n] @@ -502,17 +487,8 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': if m in common: an[m][1] = bond.order for n in common: - san = sa[n] - if san.atomic_number != oa[n].atomic_number or san.isotope != oa[n].isotope: - raise MappingError(f'atoms with number {n} not equal') - - hc[n] = sc[n] - hpc[n] = oc[n] - hr[n] = sr[n] - hpr[n] = or_[n] + ha[n] = DynamicElement.from_atoms(sa[n], oa[n]) hb[n] = {} - ha[n] = a = DynamicElement.from_atom(san) - a._attach_graph(h, n) for m, (o1, o2) in adj[n].items(): if m not in ha: @@ -926,44 +902,20 @@ def __enter__(self): """ Transaction of changes. Keep current state for restoring on errors. """ - atoms = {} - for n, atom in self._atoms.items(): - atom = atom.copy() - atoms[n] = atom - atom._attach_graph(self, n) - - bonds = {} - for n, m_bond in self._bonds.items(): - bonds[n] = cbn = {} - for m, bond in m_bond.items(): - if m in bonds: # bond partially exists. need back-connection. - cbn[m] = bonds[m][n] - else: - cbn[m] = bond = bond.copy() - bond._attach_graph(self, n, m) - - self._backup = {'atoms': atoms, 'bonds': bonds, 'parsed_mapping': self._parsed_mapping.copy(), - 'plane': self._plane.copy(), 'charges': self._charges.copy(), 'radicals': self._radicals.copy(), - 'hydrogens': self._hydrogens.copy(), 'conformers': [x.copy() for x in self._conformers], - 'atoms_stereo': self._atoms_stereo.copy(), 'allenes_stereo': self._allenes_stereo.copy(), - 'cis_trans_stereo': self._cis_trans_stereo.copy()} + self._backup = self.copy() return self def __exit__(self, exc_type, exc_val, exc_tb): if exc_type: # restore state backup = self._backup - self._atoms = backup['atoms'] - self._bonds = backup['bonds'] - self._parsed_mapping = backup['parsed_mapping'] - self._plane = backup['plane'] - self._charges = backup['charges'] - self._radicals = backup['radicals'] - self._hydrogens = backup['hydrogens'] - self._conformers = backup['conformers'] - self._atoms_stereo = backup['atoms_stereo'] - self._allenes_stereo = backup['allenes_stereo'] - self._cis_trans_stereo = backup['cis_trans_stereo'] + self._atoms = backup._atoms + self._bonds = backup._bonds + self._meta = backup._meta + self._name = backup._name self.flush_cache() + else: # update internal state + self.fix_labels() + self.fix_stereo() del self._backup diff --git a/chython/periodictable/base/dynamic.py b/chython/periodictable/base/dynamic.py index d0989547..c7af1a7a 100644 --- a/chython/periodictable/base/dynamic.py +++ b/chython/periodictable/base/dynamic.py @@ -17,7 +17,7 @@ # along with this program; if not, see . # from abc import ABC, abstractmethod -from typing import Type, Union, Optional +from typing import Type, Optional from .element import Element @@ -26,6 +26,8 @@ class DynamicElement(ABC): def __init__(self, isotope: Optional[int]): self._isotope = isotope + self._charge = self._p_charge = 0 + self._is_radical = self._p_is_radical = False @property def isotope(self): @@ -65,15 +67,36 @@ def from_atomic_number(cls, number: int) -> Type['DynamicElement']: return element @classmethod - def from_atom(cls, atom: Union['Element', 'DynamicElement']) -> 'DynamicElement': + def from_atom(cls, atom: 'Element') -> 'DynamicElement': """ - get DynamicElement object from Element object or copy of DynamicElement object + get DynamicElement object from Element object """ - if isinstance(atom, Element): - return cls.from_atomic_number(atom.atomic_number)(atom.isotope) - elif not isinstance(atom, DynamicElement): - raise TypeError('Element or DynamicElement expected') - return atom.copy() + if not isinstance(atom, Element): + raise TypeError('Element expected') + dynamic = object.__new__(cls.from_atomic_number(atom.atomic_number)) + dynamic._isotope = atom.isotope + dynamic._charge = dynamic._p_charge = atom.charge + dynamic._is_radical = dynamic._p_is_radical = atom.is_radical + return dynamic + + @classmethod + def from_atoms(cls, atom1: 'Element', atom2: 'Element') -> 'DynamicElement': + """ + get DynamicElement object from pair of Element objects + """ + if not isinstance(atom1, Element) or not isinstance(atom2, Element): + raise TypeError('Element expected') + if atom1.atomic_number != atom2.atomic_number: + raise ValueError('elements should be of the same type') + if atom1.isotope != atom2.isotope: + raise ValueError('elements should be of the same isotope') + dynamic = object.__new__(cls.from_atomic_number(atom1.atomic_number)) + dynamic._isotope = atom1.isotope + dynamic._charge = atom1.charge + dynamic._p_charge = atom2.charge + dynamic._is_radical = atom1.is_radical + dynamic._p_is_radical = atom2.is_radical + return dynamic @property def charge(self) -> int: diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index d65e039d..943d1128 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -45,11 +45,12 @@ def __init__(self, isotope: Optional[int] = None): self._is_radical = False self._x = self._y = 0 self._implicit_hydrogens = None + self._stereo = None + self._explicit_hydrogens = 0 self._neighbors = 0 self._heteroatoms = 0 self._hybridization = 1 - self._stereo = None self._ring_sizes = () self._in_ring = False @@ -243,22 +244,40 @@ def in_ring(self) -> bool: """ return self._in_ring - def copy(self, full=False): + def copy(self, full=False, hydrogens=False, stereo=False) -> 'Element': + """ + Get a copy of the Element object with attribute copy control. + """ copy = object.__new__(self.__class__) copy._isotope = self.isotope copy._charge = self.charge copy._is_radical = self.is_radical + copy._x = self.x + copy._y = self.y if full: - copy._x = self.x - copy._y = self.y copy._implicit_hydrogens = self.implicit_hydrogens - copy._explicit_hydrogens = self.explicit_hydrogens copy._stereo = self.stereo + copy._explicit_hydrogens = self.explicit_hydrogens copy._neighbors = self.neighbors copy._heteroatoms = self.heteroatoms copy._hybridization = self.hybridization copy._ring_sizes = self.ring_sizes copy._in_ring = self.in_ring + else: + copy._explicit_hydrogens = 0 + copy._neighbors = 0 + copy._heteroatoms = 0 + copy._hybridization = 1 + copy._ring_sizes = () + copy._in_ring = False + if hydrogens: + copy._implicit_hydrogens = self.implicit_hydrogens + else: + copy._implicit_hydrogens = None + if stereo: + copy._stereo = self.stereo + else: + copy._stereo = None return copy def __copy__(self): @@ -290,15 +309,6 @@ def from_atomic_number(cls, number: int) -> Type['Element']: except KeyError: raise ValueError(f'Element with number "{number}" not found') - @classmethod - def from_atom(cls, atom: 'Element') -> 'Element': - """ - get Element copy - """ - if not isinstance(atom, Element): - raise TypeError('Element expected') - return atom.copy() - def __eq__(self, other): """ compare attached to molecules elements diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 4145acf5..19b5e66b 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -413,19 +413,30 @@ def from_atomic_number(cls, number: int) -> Type['QueryElement']: return element @classmethod - def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': + def from_atom(cls, atom: 'Element', neighbors=False, hybridization=False, heteroatoms=False, + hydrogens=False, ring_sizes=False) -> 'QueryElement': """ get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement """ - if isinstance(atom, Element): - # transfer true atomic props - query = cls.from_atomic_number(atom.atomic_number)(atom.isotope) - query._charge = atom.charge - query._is_radical = atom.is_radical - return query - elif not isinstance(atom, Query): + if not isinstance(atom, Element): raise TypeError('Element or Query expected') - return atom.copy() + + # transfer true atomic props + query = cls.from_atomic_number(atom.atomic_number)(atom.isotope) + query._charge = atom.charge + query._is_radical = atom.is_radical + + if neighbors: + query._neighbors == (atom.neighbors,) + if hybridization: + query._hybridization == (atom.hybridization,) + if heteroatoms: + query._heteroatoms = (atom.heteroatoms,) + if ring_sizes: + query._ring_sizes = atom.ring_sizes + if hydrogens and atom.implicit_hydrogens is not None: + query._implicit_hydrogens = (atom.implicit_hydrogens,) + return query def copy(self, full=False): copy = super().copy(full=full) From e0fb2c5b91f01da54f0b76e8f523c9675e1ff648 Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 14:54:16 +0100 Subject: [PATCH 05/51] Enhance molecule container: retain stereo info and fix labels. Updated MoleculeContainer to retain stereo information during atom/bond operations by introducing conditions in the fix_labels method. Expanded substructure method allowing customizable mark settings and improved copy methods in Bond and QueryElement to optionally retain stereo data. --- chython/containers/bonds.py | 7 +- chython/containers/molecule.py | 178 +++++++++++----------------- chython/periodictable/base/query.py | 11 +- 3 files changed, 79 insertions(+), 117 deletions(-) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index 88cedd85..79f13cad 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -65,7 +65,7 @@ def stereo(self) -> Optional[bool]: def in_ring(self) -> bool: return self._in_ring - def copy(self, full=False) -> 'Bond': + def copy(self, full=False, stereo=False) -> 'Bond': copy = object.__new__(self.__class__) copy._order = self.order if full: @@ -73,7 +73,10 @@ def copy(self, full=False) -> 'Bond': copy._in_ring = self.in_ring else: copy._in_ring = False - copy._stereo = None + if stereo: + copy._stereo = self.stereo + else: + copy._stereo = None return copy def __copy__(self): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 5ccf06fc..40205489 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -37,20 +37,21 @@ from ..algorithms.stereo import MoleculeStereo from ..algorithms.tautomers import Tautomers from ..algorithms.x3dom import X3domMolecule -from ..exceptions import MappingError, ValenceError +from ..exceptions import ValenceError from ..periodictable import DynamicElement, Element, QueryElement, H class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, X3domMolecule): - __slots__ = ('_backup', '_meta', '_name', '_changed') + __slots__ = ('_meta', '_name', '_changed', '_backup') def __init__(self): super().__init__() self._meta = None self._name = None self._changed = None + self._backup = None @property def meta(self) -> Dict: @@ -213,7 +214,7 @@ def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=Fals self._changed = {n} else: self._changed.add(n) - if not _skip_calculation: + if not _skip_calculation and self._backup is None: self.fix_labels() return n @@ -236,8 +237,9 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): else: self._changed.add(n) self._changed.add(m) - if not _skip_calculation: + if not _skip_calculation and self._backup is None: self.fix_labels() + self.fix_stereo() def delete_atom(self, n: int, *, _skip_calculation=False): """ @@ -256,8 +258,9 @@ def delete_atom(self, n: int, *, _skip_calculation=False): self._changed = {m} else: self._changed.add(m) - if not _skip_calculation: + if not _skip_calculation and self._backup is None: self.fix_labels() + self.fix_stereo() def delete_bond(self, n: int, m: int, *, _skip_calculation=False): """ @@ -274,8 +277,9 @@ def delete_bond(self, n: int, m: int, *, _skip_calculation=False): else: self._changed.add(n) self._changed.add(m) - if not _skip_calculation: + if not _skip_calculation and self._backup is None: self.fix_labels() + self.fix_stereo() def copy(self) -> 'MoleculeContainer': copy = super().copy() @@ -293,7 +297,8 @@ def union(self, other: 'MoleculeContainer', *, remap: bool = False, copy: bool = def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalculate_hydrogens=True, skip_neighbors_marks=False, skip_hybridizations_marks=False, skip_hydrogens_marks=False, - skip_rings_sizes_marks=False, skip_heteroatoms_marks=False) -> \ + skip_rings_sizes_marks=False, skip_heteroatoms_marks=False, skip_in_ring_bond_marks=False, + skip_stereo_marks=False) -> \ Union['MoleculeContainer', 'QueryContainer']: """ Create substructure containing atoms from atoms list. @@ -310,6 +315,8 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul :param skip_hydrogens_marks: Don't set hydrogens count marks on substructured queries :param skip_rings_sizes_marks: Don't set rings_sizes marks on substructured queries :param skip_heteroatoms_marks: Don't set heteroatoms count marks + :param skip_in_ring_bond_marks: Don't set in_ring bond marks + :param skip_stereo_marks: Don't set stereo marks on substructured queries """ if not atoms: raise ValueError('empty atoms list not allowed') @@ -317,97 +324,51 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul raise ValueError('invalid atom numbers') atoms = tuple(n for n in self._atoms if n in atoms) # save original order if as_query: - atom_type = QueryElement - bond_type = QueryBond sub = object.__new__(QueryContainer) - else: - atom_type = Element - bond_type = Bond - sub = object.__new__(self.__class__) - sub._MoleculeContainer__name = sub._MoleculeContainer__meta = None - - sa = self._atoms - sb = self._bonds - sc = self._charges - sr = self._radicals - - sub._charges = {n: sc[n] for n in atoms} - sub._radicals = {n: sr[n] for n in atoms} - sub._atoms = ca = {} + lost = {n for n, a in self._atoms.items() if a.atomic_number != 1} - set(atoms) # atoms not in substructure + # atoms with fully present neighbors + not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} + + # check for full presence of cumulene chains and terminal attachments + for p in self._stereo_cumulenes.values(): + if not not_skin.issuperset(p): + not_skin.difference_update(p) + + sub._atoms = {n: QueryElement.from_atom(self._atoms[n], + neighbors=not skip_neighbors_marks, + hybridization=not skip_hybridizations_marks, + hydrogens=not skip_hydrogens_marks, + ring_sizes=not skip_rings_sizes_marks, + heteroatoms=not skip_heteroatoms_marks, + stereo=not skip_stereo_marks and n in not_skin) + for n in atoms} + sub._bonds = sb = {} + for n in atoms: + sb[n] = sbn = {} + for m, bond in self._bonds[n].items(): + if m in sb: # bond partially exists. need back-connection. + sbn[m] = sb[m][n] + elif m in atoms: + sbn[m] = QueryBond.from_bond(bond, + in_ring=not skip_in_ring_bond_marks, + stereo=not skip_stereo_marks and n in not_skin and m in not_skin) + return sub + + # molecule substructure + sub = object.__new__(self.__class__) + sub._name = sub._meta = sub._changed = None + sub._atoms = {n: self._atoms[n].copy(hydrogens=not recalculate_hydrogens, stereo=True) for n in atoms} + sub._bonds = sb = {} for n in atoms: - ca[n] = atom = atom_type.from_atom(sa[n]) - atom._attach_graph(sub, n) - - sub._bonds = cb = {} - for n in atoms: - cb[n] = cbn = {} - for m, bond in sb[n].items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] + sb[n] = sbn = {} + for m, bond in self._bonds[n].items(): + if m in sb: # bond partially exists. need back-connection. + sbn[m] = sb[m][n] elif m in atoms: - cbn[m] = bond = bond_type.from_bond(bond) - if not as_query: - bond._attach_graph(sub, n, m) - - if as_query: - lost = {n for n, a in sa.items() if a.atomic_number != 1} - set(atoms) # atoms not in substructure - not_skin = {n for n in atoms if lost.isdisjoint(sb[n])} - sub._atoms_stereo = {n: s for n, s in self._atoms_stereo.items() if n in not_skin} - sub._allenes_stereo = {n: s for n, s in self._allenes_stereo.items() - if not_skin.issuperset(self._stereo_allenes_paths[n]) and - not_skin.issuperset(x for x in self._stereo_allenes[n] if x)} - sub._cis_trans_stereo = {nm: s for nm, s in self._cis_trans_stereo.items() - if not_skin.issuperset(self._stereo_cis_trans_paths[nm]) and - not_skin.issuperset(x for x in self._stereo_cis_trans[nm] if x)} - - sub._masked = {n: False for n in atoms} - if skip_heteroatoms_marks: - sub._heteroatoms = {n: () for n in atoms} - else: - sha = self.heteroatoms - sub._heteroatoms = {n: (sha(n),) for n in atoms} - - if skip_hybridizations_marks: - sub._hybridizations = {n: () for n in atoms} - else: - sh = self.hybridization - sub._hybridizations = {n: (sh(n),) for n in atoms} - if skip_neighbors_marks: - sub._neighbors = {n: () for n in atoms} - else: - sn = self.neighbors - sub._neighbors = {n: (sn(n),) for n in atoms} - if skip_hydrogens_marks: - sub._hydrogens = {n: () for n in atoms} - else: - shg = self._hydrogens - sub._hydrogens = {n: () if shg[n] is None else (shg[n],) for n in atoms} - if skip_rings_sizes_marks: - sub._rings_sizes = {n: () for n in atoms} - else: - rs = self.atoms_rings_sizes - sub._rings_sizes = {n: rs.get(n, ()) for n in atoms} - else: - sub._conformers = [{n: c[n] for n in atoms} for c in self._conformers] - - if recalculate_hydrogens: - sub._hydrogens = {} - for n in atoms: - sub._calc_implicit(n) - else: - hg = self._hydrogens - sub._hydrogens = {n: hg[n] for n in atoms} - - sp = self._plane - sub._plane = {n: sp[n] for n in atoms} - sub._parsed_mapping = {n: m for n, m in self._parsed_mapping.items() if n in atoms} - - # fix_stereo will repair data - sub._atoms_stereo = self._atoms_stereo.copy() - sub._allenes_stereo = self._allenes_stereo.copy() - sub._cis_trans_stereo = self._cis_trans_stereo.copy() - sub.fix_stereo() + sbn[m] = bond.copy(stereo=True) + sub.fix_labels(recalculate_hydrogens=recalculate_hydrogens) + sub.fix_stereo() return sub def augmented_substructure(self, atoms: Iterable[int], deep: int = 1, **kwargs) -> 'MoleculeContainer': @@ -442,36 +403,29 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': """ if not isinstance(other, MoleculeContainer): raise TypeError('MoleculeContainer expected') - sa = self._atoms - sb = self._bonds - bonds = [] adj = defaultdict(lambda: defaultdict(lambda: [None, None])) - - oa = other._atoms - ob = other._bonds - - common = sa.keys() & oa.keys() + common = self._atoms.keys() & other._atoms.keys() h = CGRContainer() ha = h._atoms hb = h._bonds - for n in sa.keys() - common: # cleavage atoms - ha[n] = DynamicElement.from_atom(sa[n]) + for n in self._atoms.keys() - common: # cleavage atoms + ha[n] = DynamicElement.from_atom(self._atoms[n]) hb[n] = {} - for m, bond in sb[n].items(): + for m, bond in self._bonds[n].items(): if m not in ha: if m in common: # bond to common atoms is broken bond bond = DynamicBond(bond.order, None) else: bond = DynamicBond.from_bond(bond) bonds.append((n, m, bond)) - for n in oa.keys() - common: # coupling atoms - ha[n] = DynamicElement.from_atom(oa[n]) + for n in other._atoms.keys() - common: # coupling atoms + ha[n] = DynamicElement.from_atom(other._atoms[n]) hb[n] = {} - for m, bond in ob[n].items(): + for m, bond in other._bonds[n].items(): if m not in ha: if m in common: # bond to common atoms is formed bond bond = DynamicBond(None, bond.order) @@ -480,14 +434,14 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': bonds.append((n, m, bond)) for n in common: an = adj[n] - for m, bond in sb[n].items(): + for m, bond in self._bonds[n].items(): if m in common: an[m][0] = bond.order - for m, bond in ob[n].items(): + for m, bond in other._bonds[n].items(): if m in common: an[m][1] = bond.order for n in common: - ha[n] = DynamicElement.from_atoms(sa[n], oa[n]) + ha[n] = DynamicElement.from_atoms(self._atoms[n], other._atoms[n]) hb[n] = {} for m, (o1, o2) in adj[n].items(): @@ -916,7 +870,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): else: # update internal state self.fix_labels() self.fix_stereo() - del self._backup + self._backup = None # drop backup __all__ = ['MoleculeContainer'] diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 19b5e66b..fc26c962 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -107,6 +107,9 @@ def copy(self, full=False): if full: copy._masked = self.masked copy._stereo = self.stereo + else: + copy._masked = False + copy._stereo = None return copy def __copy__(self): @@ -414,7 +417,7 @@ def from_atomic_number(cls, number: int) -> Type['QueryElement']: @classmethod def from_atom(cls, atom: 'Element', neighbors=False, hybridization=False, heteroatoms=False, - hydrogens=False, ring_sizes=False) -> 'QueryElement': + hydrogens=False, ring_sizes=False, stereo=False) -> 'QueryElement': """ get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement """ @@ -427,15 +430,17 @@ def from_atom(cls, atom: 'Element', neighbors=False, hybridization=False, hetero query._is_radical = atom.is_radical if neighbors: - query._neighbors == (atom.neighbors,) + query._neighbors = (atom.neighbors,) if hybridization: - query._hybridization == (atom.hybridization,) + query._hybridization = (atom.hybridization,) if heteroatoms: query._heteroatoms = (atom.heteroatoms,) if ring_sizes: query._ring_sizes = atom.ring_sizes if hydrogens and atom.implicit_hydrogens is not None: query._implicit_hydrogens = (atom.implicit_hydrogens,) + if stereo: + query._stereo = atom.stereo return query def copy(self, full=False): From fb08fdc75d6f287bfade1b0ce35386e67a3be236 Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 16:01:19 +0100 Subject: [PATCH 06/51] Refactor stereo and chemical attributes handling Centralize chemical attributes like charge and radicals within the `atom` object. Simplify stereo data management by directly setting stereochemistry on atom and bond objects and remove unnecessary lookups. Add `ExtendedQuery` to public API and streamline related imports. --- chython/algorithms/isomorphism.py | 2 +- chython/algorithms/smiles.py | 132 +++++++++++-------------- chython/algorithms/stereo/graph.py | 14 +-- chython/periodictable/base/__init__.py | 3 +- chython/periodictable/base/query.py | 31 +++--- 5 files changed, 85 insertions(+), 97 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 76791e70..e2d95da3 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -22,7 +22,7 @@ from itertools import permutations from typing import Any, Collection, Dict, Iterator, Optional, TYPE_CHECKING, Union from .._functions import lazy_product -from ..periodictable.element import Element, Query, AnyElement, AnyMetal, ListElement +from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement if TYPE_CHECKING: diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index e4b8dfdd..412c76e0 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -26,6 +26,7 @@ from itertools import product from random import random from typing import Callable, Optional, Tuple, TYPE_CHECKING, Union +from ..periodictable import ExtendedQuery, QueryElement if TYPE_CHECKING: @@ -382,15 +383,11 @@ def _smiles_order(self: 'MoleculeContainer', stereo=True) -> Callable: def _format_cxsmiles(self: 'MoleculeContainer', order): if self.is_radical: - radical = self._radicals - return f'|^1:{",".join(str(n) for n, m in enumerate(order) if radical[m])}|' + return f'|^1:{",".join(str(n) for n, m in enumerate(order) if self._atoms[m].is_radical)}|' return def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): atom = self._atoms[n] - charge = self._charges[n] - ih = self._hydrogens[n] - hyb = self.hybridization(n) smi = ['', # [ str(atom.isotope) if atom.isotope else '', # isotope @@ -401,50 +398,51 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): f':{n}' if kwargs.get('mapping', False) else '', # mapping ''] # ] - if kwargs.get('stereo', True): - if n in self._atoms_stereo: - if ih and next(x for x in adjacency) == n: # first atom in smiles has reversed chiral mark - smi[3] = '@@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@' - else: - smi[3] = '@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@@' - elif n in self._allenes_stereo: + if atom.stereo is not None and kwargs.get('stereo', True): + # allene + if n in self._stereo_allenes_terminals: t1, t2 = self._stereo_allenes_terminals[n] env = self._stereo_allenes[n] n1 = next(x for x in adjacency[t1] if x in env) n2 = next(x for x in adjacency[t2] if x in env) smi[3] = '@' if self._translate_allene_sign(n, n1, n2) else '@@' - elif charge and kwargs.get('charges', True): - smi[5] = charge_str[charge] - elif charge and kwargs.get('charges', True): - smi[5] = charge_str[charge] + # tetrahedron + elif atom.implicit_hydrogens and next(x for x in adjacency) == n: + # first atom in smiles has reversed chiral mark + smi[3] = '@@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@' + else: + smi[3] = '@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@@' + + if atom.charge and kwargs.get('charges', True): + smi[5] = charge_str[atom.charge] - if any(smi) or atom.atomic_symbol not in organic_set or self._radicals[n] or kwargs.get('hydrogens', False): + if any(smi) or atom.atomic_symbol not in organic_set or atom.is_radical or kwargs.get('hydrogens', False): smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' - elif ih: - smi[4] = f'H{ih}' - elif hyb == 4 and ih and atom.atomic_number in (5, 7, 15): # pyrrole + elif atom.implicit_hydrogens: + smi[4] = f'H{atom.implicit_hydrogens}' + elif atom.hybridization == 4 and atom.implicit_hydrogens and atom.atomic_number in (5, 7, 15): # pyrrole smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' else: - smi[4] = f'H{ih}' - elif not ih and atom.atomic_number in (5, 6, 15, 16) and not self.not_special_connectivity[n]: + smi[4] = f'H{atom.implicit_hydrogens}' + elif not atom.implicit_hydrogens and atom.atomic_number in (5, 6, 15, 16) and not self.not_special_connectivity[n]: # elemental B, C, P, S smi[0] = '[' smi[-1] = ']' - elif ih and atom.atomic_number == 15 and hyb != 1: + elif atom.implicit_hydrogens and atom.atomic_number == 15 and atom.hybridization != 1: smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' else: - smi[4] = f'H{ih}' + smi[4] = f'H{atom.implicit_hydrogens}' - if kwargs.get('aromatic', True) and hyb == 4: + if kwargs.get('aromatic', True) and atom.hybridization == 4: smi[2] = atom.atomic_symbol.lower() else: smi[2] = atom.atomic_symbol @@ -453,14 +451,13 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): if not kwargs.get('bonds', True): return '' - bonds = self._bonds - order = bonds[n][m].order + order = self._bonds[n][m].order if order == 4: if kwargs.get('aromatic', True): return '' return ':' elif order == 1: # cis-trans /\ - if kwargs.get('aromatic', True) and self.hybridization(n) == self.hybridization(m) == 4: + if kwargs.get('aromatic', True) and self._atoms[n].hybridization == self._atoms[m].hybridization == 4: return '-' if kwargs.get('stereo', True): if 'cache' in adjacency: @@ -531,19 +528,15 @@ class CGRSmiles(Smiles): def _format_atom(self: 'CGRContainer', n, adjacency, **kwargs): atom = self._atoms[n] - charge = self._charges[n] - is_radical = self._radicals[n] - p_charge = self._p_charges[n] - p_is_radical = self._p_radicals[n] if atom.isotope: smi = [str(atom.isotope), atom.atomic_symbol] else: smi = [atom.atomic_symbol] - if charge or p_charge: - smi.append(dyn_charge_str[(charge, p_charge)]) - if is_radical or p_is_radical: - smi.append(dyn_radical_str[(is_radical, p_is_radical)]) + if atom.charge or atom.p_charge: + smi.append(dyn_charge_str[(atom.charge, atom.p_charge)]) + if atom.is_radical or atom.p_is_radical: + smi.append(dyn_radical_str[(atom.is_radical, atom.p_is_radical)]) if len(smi) != 1 or atom.atomic_symbol not in organic_set: smi.insert(0, '[') @@ -559,22 +552,19 @@ class QuerySmiles(Smiles): __slots__ = () def _format_cxsmiles(self: 'QueryContainer', order): - hybridization = self._hybridizations - heteroatoms = self._heteroatoms - masked = self._masked - radical = self._radicals - hh = ['atomProp'] cx = [] - if any(radical.values()): - cx.append(f'^1:{",".join(str(n) for n, m in enumerate(order) if radical[m])}') + rad = [str(n) for n, m in enumerate(order) if isinstance(a:=self._atoms[m], ExtendedQuery) and a.is_radical] + if rad: + cx.append('^1:' + ','.join(rad)) for n, m in enumerate(order): - if len(hb := hybridization[m]) > 1 or (hb and hb[0] != 4): - hh.append(f'{n}.hyb.{"".join(hybridization_str[x] for x in hb)}') - if ha := heteroatoms[m]: - hh.append(f'{n}.het.{"".join(str(x) for x in ha)}') - if masked[m]: + atom = self._atoms[m] + if len(hb := atom.hybridization) > 1 or (hb and hb[0] != 4): + hh.append(f'{n}.hyb.' + ''.join(hybridization_str[x] for x in hb)) + if isinstance(atom, ExtendedQuery) and (ha := atom.heteroatoms): + hh.append(f'{n}.het.' + ''.join(str(x) for x in ha)) + if atom.masked: hh.append(f'{n}.msk.1') if len(hh) > 1: cx.append(':'.join(hh)) @@ -583,42 +573,36 @@ def _format_cxsmiles(self: 'QueryContainer', order): def _format_atom(self: 'QueryContainer', n, adjacency, **kwargs): atom = self._atoms[n] - charge = self._charges[n] - hybridization = self._hybridizations[n] - neighbors = self._neighbors[n] - hydrogens = self._hydrogens[n] - rings = self._rings_sizes[n] - - if atom.isotope: + if isinstance(atom, QueryElement) and atom.isotope: smi = ['[', str(atom.isotope), atom.atomic_symbol] else: smi = ['[', atom.atomic_symbol] - if n in self._atoms_stereo: # mark atom as chiral. it's too difficult to set correct sign - smi.append(';@?') - if n in self._allenes_stereo: - smi.append(';@?') + if isinstance(atom, ExtendedQuery): + if atom.stereo is not None: + # mark atom as chiral. it's too difficult to set correct sign + smi.append(';@?') - if charge: - smi.append(';') - smi.append(charge_str[charge]) + if atom.charge: + smi.append(';') + smi.append(charge_str[atom.charge]) - if hydrogens: # h implicit-H-count implicit hydrogens - smi.append(';') - smi.append(','.join(f'h{x}' for x in hydrogens)) + if atom.implicit_hydrogens: # h implicit-H-count implicit hydrogens + smi.append(';') + smi.append(','.join(f'h{x}' for x in atom.implicit_hydrogens)) - if neighbors: # D degree explicit connections + if atom.neighbors: # D degree explicit connections smi.append(';') - smi.append(','.join(f'D{x}' for x in neighbors)) + smi.append(','.join(f'D{x}' for x in atom.neighbors)) - if rings: + if isinstance(atom, ExtendedQuery) and atom.ring_sizes: smi.append(';') - if rings[0]: - smi.append(','.join(f'r{x}' for x in rings)) + if atom.ring_sizes[0]: + smi.append(','.join(f'r{x}' for x in atom.ring_sizes)) else: smi.append('!R') - if len(hybridization) == 1 and hybridization[0] == 4: # only aromatic. other marks in cx extension + if len(atom.hybridization) == 1 and atom.hybridization[0] == 4: # only aromatic. other marks in cx extension smi.append(';a') smi.append(']') diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py index 01dbd26e..6fe91b76 100644 --- a/chython/algorithms/stereo/graph.py +++ b/chython/algorithms/stereo/graph.py @@ -72,12 +72,10 @@ def tetrahedrons(self: 'Container') -> Tuple[int, ...]: """ atoms = self._atoms bonds = self._bonds - charges = self._charges - radicals = self._radicals tetra = [] for n, atom in atoms.items(): - if atom.atomic_number == 6 and not charges[n] and not radicals[n]: + if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: env = bonds[n] if all(int(x) == 1 for x in env.values()): if sum(int(x) for x in env.values()) > 4: @@ -89,9 +87,11 @@ def clean_stereo(self: 'Container'): """ Remove stereo data. """ - self._atoms_stereo.clear() - self._allenes_stereo.clear() - self._cis_trans_stereo.clear() + for a in self._atoms.values(): + a._stereo = None + for _, bs in self._bonds: + for b in bs.values(): + b._stereo = None # flush twice, but it should be still faster self.flush_cache() def get_mapping(self: 'Container', other: 'Container', **kwargs): @@ -156,7 +156,7 @@ def _translate_tetrahedron_sign(self: 'Container', n, env, s=None): :param s: if None, use existing sign else translate given to molecule """ if s is None: - s = self._atoms_stereo[n] + s = self._atoms[n].stereo order = self._stereo_tetrahedrons[n] if len(order) == 3: diff --git a/chython/periodictable/base/__init__.py b/chython/periodictable/base/__init__.py index f8ca87e8..75806828 100644 --- a/chython/periodictable/base/__init__.py +++ b/chython/periodictable/base/__init__.py @@ -21,4 +21,5 @@ from .query import * -__all__ = ['Element', 'DynamicElement', 'Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Element', 'DynamicElement', 'Query', 'ExtendedQuery', + 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index fc26c962..325c0947 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -47,13 +47,17 @@ def _validate(value, prop): class Query(ABC): - __slots__ = ('_neighbors', '_hybridization', '_masked', '_stereo') + __slots__ = ('_neighbors', '_hybridization', '_masked') def __init__(self): self._neighbors = () self._hybridization = () self._masked = False - self._stereo = None + + @property + @abstractmethod + def atomic_symbol(self) -> str: + ... @property def neighbors(self) -> Tuple[int, ...]: @@ -96,20 +100,12 @@ def masked(self, value): raise TypeError('masked should be bool') self._masked = value - @property - def stereo(self): - return self._stereo - def copy(self, full=False): copy = object.__new__(self.__class__) copy._neighbors = self.neighbors copy._hybridization = self.hybridization - if full: - copy._masked = self.masked - copy._stereo = self.stereo - else: - copy._masked = False - copy._stereo = None + + copy._masked = self.masked if full else False return copy def __copy__(self): @@ -120,7 +116,7 @@ def __repr__(self): class ExtendedQuery(Query, ABC): - __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens') + __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens', '_stereo') def __init__(self): super().__init__() @@ -129,6 +125,7 @@ def __init__(self): self._heteroatoms = () self._ring_sizes = () self._implicit_hydrogens = () + self._stereo = None @property def charge(self) -> int: @@ -200,6 +197,10 @@ def ring_sizes(self, value): else: raise TypeError('rings should be int or list or tuple of ints') + @property + def stereo(self): + return self._stereo + def copy(self, full=False): copy = super().copy(full=full) copy._charge = self.charge @@ -207,6 +208,8 @@ def copy(self, full=False): copy._heteroatoms = self.heteroatoms copy._implicit_hydrogens = self.implicit_hydrogens copy._ring_sizes = self.ring_sizes + + copy._stereo = self.stereo if full else None return copy @@ -499,4 +502,4 @@ def __hash__(self): self.hybridization, self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) -__all__ = ['Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Query', 'ExtendedQuery', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] From 04ef91e9f42219c8efaca4fecb8c972b25c2e525 Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 1 Nov 2024 16:16:41 +0100 Subject: [PATCH 07/51] kekule adapted --- chython/algorithms/aromatics/kekule.py | 141 ++++++++++++------------- 1 file changed, 67 insertions(+), 74 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index ef9834e9..de51744b 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -46,7 +46,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool bonds = self._bonds atoms = set() for n, m, b in kekule: - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b atoms.add(n) atoms.add(m) for n in atoms: @@ -65,7 +65,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): bonds = copy._bonds atoms = set() for n, m, b in form: - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b atoms.add(n) atoms.add(m) for n in atoms: @@ -73,8 +73,8 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): yield copy def __fix_rings(self: 'MoleculeContainer'): + atoms = self._atoms bonds = self._bonds - charges = self._charges seen = set() for q, af, bf, mm in rules: for mapping in q.get_mapping(self, automorphism_filter=False): @@ -85,11 +85,11 @@ def __fix_rings(self: 'MoleculeContainer'): for n, c in af.items(): n = mapping[n] - charges[n] = c + atoms[n]._charge = c for n, m, b in bf: n = mapping[n] m = mapping[m] - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b if seen: self.flush_cache() return True @@ -97,11 +97,7 @@ def __fix_rings(self: 'MoleculeContainer'): def __prepare_rings(self: 'MoleculeContainer'): atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - hydrogens = self._hydrogens - neighbors = self.neighbors rings = defaultdict(list) # aromatic skeleton pyrroles = set() @@ -168,133 +164,130 @@ def __prepare_rings(self: 'MoleculeContainer'): if any(len(rings[n]) != 2 for n in double_bonded): # double bonded never condensed raise InvalidAromaticRing('quinone valence error') for n in double_bonded: - if atoms[n].atomic_number == 7: - if charges[n] != 1: + atom = atoms[n] + if atom.atomic_number == 7: + if atom.charge != 1: raise InvalidAromaticRing('quinone should be charged N atom') - elif atoms[n].atomic_number not in (6, 15, 16, 33, 34, 52) or charges[n]: + elif atom.atomic_number not in (6, 15, 16, 33, 34, 52) or atom.charge: raise InvalidAromaticRing('quinone should be neutral S, Se, Te, C, P, As atom') for n in rings: - an = atoms[n].atomic_number - ac = charges[n] - ab = neighbors(n) - if an == 6: # carbon - if ac == 0: - if ab not in (2, 3): + atom = atoms[n] + if atom.atomic_number == 6: # carbon + if atom.charge == 0: + if atom.neighbors not in (2, 3): raise InvalidAromaticRing - elif ac in (-1, 1): - if radicals[n]: - if ab == 2: + elif atom.charge in (-1, 1): + if atom.is_radical: + if atom.neighbors == 2: double_bonded.add(n) else: raise InvalidAromaticRing - elif ab == 3: + elif atom.neighbors == 3: double_bonded.add(n) - elif ab == 2: # benzene (an|cat)ion or pyrrole + elif atom.neighbors == 2: # benzene (an|cat)ion or pyrrole pyrroles.add(n) else: raise InvalidAromaticRing else: raise InvalidAromaticRing - elif an in (7, 15, 33): - if ac == 0: # pyrrole or pyridine. include radical pyrrole - if radicals[n]: - if ab != 2: # only pyrrole radical + elif atom.atomic_number in (7, 15, 33): + if atom.charge == 0: # pyrrole or pyridine. include radical pyrrole + if atom.is_radical: + if atom.neighbors != 2: # only pyrrole radical raise InvalidAromaticRing double_bonded.add(n) - elif ab == 3: - if an == 7: # pyrrole only possible + elif atom.neighbors == 3: + if atom.atomic_number == 7: # pyrrole only possible double_bonded.add(n) else: # P(III) or P(V)H pyrroles.add(n) - elif ab == 2: - ah = hydrogens[n] - if ah is None: # pyrrole or pyridine + elif atom.neighbors == 2: + if atom.implicit_hydrogens is None: # pyrrole or pyridine pyrroles.add(n) - elif ah == 1: # only pyrrole + elif atom.implicit_hydrogens == 1: # only pyrrole double_bonded.add(n) - elif ah: # too many hydrogens for aromatic rings + elif atom.implicit_hydrogens: # too many hydrogens for aromatic rings raise InvalidAromaticRing - elif ab != 4 or an not in (15, 33): # P(V) in ring [P;a](-R1)-R2 + elif atom.neighbors != 4 or atom.atomic_number not in (15, 33): # P(V) in ring [P;a](-R1)-R2 raise InvalidAromaticRing - elif ac == -1: # pyrrole only - if ab != 2 or radicals[n]: + elif atom.charge == -1: # pyrrole only + if atom.neighbors != 2 or atom.is_radical: raise InvalidAromaticRing double_bonded.add(n) - elif ac != 1: + elif atom.charge != 1: raise InvalidAromaticRing - elif radicals[n]: - if ab != 2: # not cation-radical pyridine + elif atom.is_radical: + if atom.neighbors != 2: # not cation-radical pyridine raise InvalidAromaticRing - elif ab == 2: # pyrrole cation or protonated pyridine + elif atom.neighbors == 2: # pyrrole cation or protonated pyridine pyrroles.add(n) - elif ab != 3: # not pyridine oxyde + elif atom.neighbors != 3: # not pyridine oxyde raise InvalidAromaticRing - elif an == 8: # furan - if ab == 2: - if ac == 0: - if radicals[n]: + elif atom.atomic_number == 8: # furan + if atom.neighbors == 2: + if atom.charge == 0: + if atom.is_radical: raise InvalidAromaticRing('radical oxygen') double_bonded.add(n) - elif ac == 1: - if radicals[n]: # furan cation-radical + elif atom.charge == 1: + if atom.is_radical: # furan cation-radical double_bonded.add(n) # pyrylium else: raise InvalidAromaticRing('invalid oxygen charge') else: raise InvalidAromaticRing('Triple-bonded oxygen') - elif an in (16, 34, 52): # thiophene + elif atom.atomic_number in (16, 34, 52): # thiophene if n not in double_bonded: # not sulphoxyde nor sulphone - if ab == 2: - if radicals[n]: - if ac == 1: + if atom.neighbors == 2: + if atom.is_radical: + if atom.charge == 1: double_bonded.add(n) else: raise InvalidAromaticRing('S, Se, Te cation-radical expected') - if ac == 0: + if atom.charge == 0: double_bonded.add(n) - elif ac != 1: + elif atom.charge != 1: raise InvalidAromaticRing('S, Se, Te cation in benzene like ring expected') - elif ab == 3: - if radicals[n]: - if ac: + elif atom.neighbors == 3: + if atom.is_radical: + if atom.charge: raise InvalidAromaticRing('S, Se, Te ion-radical ring') double_bonded.add(n) - elif ac == 1: + elif atom.charge == 1: double_bonded.add(n) - elif ac: + elif atom.charge: raise InvalidAromaticRing('S, Se, Te invalid charge ring') else: raise InvalidAromaticRing('S, Se, Te hypervalent ring') - elif an == 5: # boron - if ac == 0: - if ab == 2: - if radicals[n]: # C=1O[B]OC=1 + elif atom.atomic_number == 5: # boron + if atom.charge == 0: + if atom.neighbors == 2: + if atom.is_radical: # C=1O[B]OC=1 double_bonded.add(n) else: - ah = hydrogens[n] - if ah is None: # b1ccccc1, C=1OBOC=1 or B1C=CC=N1 + if atom.implicit_hydrogens is None: # b1ccccc1, C=1OBOC=1 or B1C=CC=N1 pyrroles.add(n) - elif ah == 1: # C=1O[BH]OC=1 or [BH]1C=CC=N1 + elif atom.implicit_hydrogens == 1: # C=1O[BH]OC=1 or [BH]1C=CC=N1 double_bonded.add(n) - elif ah: + elif atom.implicit_hydrogens: raise InvalidAromaticRing - elif not radicals[n]: + elif not atom.is_radical: double_bonded.add(n) else: raise InvalidAromaticRing - elif ac == 1: - if ab == 2 and not radicals[n]: + elif atom.charge == 1: + if atom.neighbors == 2 and not atom.is_radical: double_bonded.add(n) else: raise InvalidAromaticRing - elif ac == -1: - if ab == 2: - if not radicals[n]: # C=1O[B-]OC=1 or [bH-]1ccccc1 + elif atom.charge == -1: + if atom.neighbors == 2: + if not atom.is_radical: # C=1O[B-]OC=1 or [bH-]1ccccc1 pyrroles.add(n) # anion-radical is benzene like - elif radicals[n]: # C=1O[B-*](R)OC=1 + elif atom.is_radical: # C=1O[B-*](R)OC=1 double_bonded.add(n) else: pyrroles.add(n) From e5a2eaede1c3c138ea037d68dce013d7a5e403a7 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 2 Nov 2024 14:11:12 +0100 Subject: [PATCH 08/51] molecule constructor refactored --- chython/containers/molecule.py | 58 +---------- chython/files/_convert.py | 137 ++++++++++++++++++++------ chython/files/daylight/smiles.py | 108 +++----------------- chython/files/daylight/tokenize.py | 15 ++- chython/periodictable/base/element.py | 18 ++-- 5 files changed, 142 insertions(+), 194 deletions(-) diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 40205489..09fa158a 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -44,7 +44,7 @@ class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, X3domMolecule): - __slots__ = ('_meta', '_name', '_changed', '_backup') + __slots__ = ('_meta', '_name', '_conformers', '_changed', '_backup') def __init__(self): super().__init__() @@ -93,52 +93,6 @@ def environment(self, atom: int, include_bond: bool = True, include_atom: bool = return tuple(self._bonds[atom].items()) return tuple(self._bonds[atom]) - def neighbors(self, n: int) -> int: - """number of neighbors atoms excluding any-bonded""" - return self._atoms[n].neighbors - - @cached_args_method - def hybridization(self, n: int) -> int: - """ - Atom hybridization. - - 1 - if atom has zero or only single bonded neighbors, 2 - if has only one double bonded neighbor and any amount - of single bonded, 3 - if has one triple bonded and any amount of double and single bonded neighbors or - two and more double bonded and any amount of single bonded neighbors, 4 - if atom in aromatic ring. - """ - return self._atoms[n].hybridization - - @cached_args_method - def heteroatoms(self, n: int) -> int: - """ - Number of neighbored heteroatoms (not carbon or hydrogen) except any-bond connected. - """ - return self._atoms[n].heteroatoms - - def implicit_hydrogens(self, n: int) -> Optional[int]: - """ - Number of implicit hydrogen atoms connected to atom. - - Returns None if count are ambiguous. - """ - return self._atoms[n].implicit_hydrogens - - def explicit_hydrogens(self, n: int) -> int: - """ - Number of explicit hydrogen atoms connected to atom. - - Take into account any type of bonds with hydrogen atoms. - """ - return self._atoms[n].explicit_hydrogens - - def total_hydrogens(self, n: int) -> int: - """ - Number of hydrogen atoms connected to atom. - - Take into account any type of bonds with hydrogen atoms. - """ - return self._atoms[n].total_hydrogens - @cached_args_method def adjacency_matrix(self, set_bonds=False, /): """ @@ -743,8 +697,7 @@ def _calc_implicit(self, n: int): """ Set firs possible hydrogens count based on rules """ - atoms = self._atoms - atom = atoms[n] + atom = self._atoms[n] if atom.atomic_number == 1: # hydrogen nether has implicit H atom._implicit_hydrogens = 0 return @@ -762,7 +715,7 @@ def _calc_implicit(self, n: int): return elif order != 8: # any bond used for complexes explicit_sum += order - explicit_dict[(order, atoms[m].atomic_number)] += 1 + explicit_dict[(order, self._atoms[m].atomic_number)] += 1 if aroma == 2: if explicit_sum == 0: # H-Ar @@ -794,8 +747,7 @@ def _calc_implicit(self, n: int): atom._implicit_hydrogens = None # rule not found def _check_implicit(self, n: int, h: int) -> bool: - atoms = self._atoms - atom = atoms[n] + atom = self._atoms[n] if atom.atomic_number == 1: # hydrogen nether has implicit H return h == 0 @@ -808,7 +760,7 @@ def _check_implicit(self, n: int, h: int) -> bool: return False elif order != 8: # any bond used for complexes explicit_sum += order - explicit_dict[(order, atoms[m].atomic_number)] += 1 + explicit_dict[(order, self._atoms[m].atomic_number)] += 1 try: rules = atom.valence_rules(explicit_sum) diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 2de1ff2b..819389e1 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Ramil Nugmanov +# Copyright 2023, 2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -22,31 +22,27 @@ from ..periodictable import Element -def create_molecule(data, *, skip_calc_implicit=False, ignore_bad_isotopes=False, _cls=MoleculeContainer): - g = object.__new__(_cls) - pm = {} - atoms = {} - plane = {} - charges = {} - radicals = {} - bonds = {} +def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False, + keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, ignore=True, + ignore_carbon_radicals=False, _cls=MoleculeContainer): + g = _cls() + atoms = g._atoms + bonds = g._bonds mapping = data['mapping'] for n, atom in enumerate(data['atoms']): + if abs(atom['charge']) > 4: + raise ValueError('formal charge should be in range [-4, 4]') n = mapping[n] - e = Element.from_symbol(atom['element']) + e = Element.from_symbol(atom.pop('element')) try: - atoms[n] = e(atom['isotope']) + atoms[n] = e(**atom) except ValueError: if not ignore_bad_isotopes: raise - atoms[n] = e() # reset isotope mark on errors. + del atom['isotope'] # reset isotope mark on errors. + atoms[n] = e(**atom) bonds[n] = {} - if (charge := atom['charge']) > 4 or charge < -4: - raise ValueError('formal charge should be in range [-4, 4]') - charges[n] = charge - radicals[n] = atom['is_radical'] - plane[n] = (atom['x'], atom['y']) - pm[n] = atom['mapping'] + for n, m, b in data['bonds']: n, m = mapping[n], mapping[m] if n == m: @@ -57,26 +53,108 @@ def create_molecule(data, *, skip_calc_implicit=False, ignore_bad_isotopes=False raise ValueError('atoms already bonded') bonds[n][m] = bonds[m][n] = Bond(b) if any(a['z'] for a in data['atoms']): - conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] - else: - conformers = [] + # store conformer + g._conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] if data['log']: # store log to the meta if data['meta'] is None: data['meta'] = {} data['meta']['chython_parsing_log'] = data['log'] + g._meta = data['meta'] - g.__setstate__({'atoms': atoms, 'bonds': bonds, 'meta': data['meta'], 'plane': plane, 'parsed_mapping': pm, - 'charges': charges, 'radicals': radicals, 'name': data['title'], 'conformers': conformers, - 'atoms_stereo': {}, 'allenes_stereo': {}, 'cis_trans_stereo': {}, 'hydrogens': {}}) - if not skip_calc_implicit: - for n in atoms: + if skip_calc_implicit: # don't calc Hs. e.g. INCHI + return g + + implicit_mismatch = {} + radicalized = [] + # precalculate Hs + for n, a in atoms.items(): + if a.implicit_hydrogens is None: + # let's try to calculate. in case of errors just keep as is. radicals in smiles should be in [brackets], + # thus has implicit Hs value g._calc_implicit(n) + elif keep_implicit: + # keep given Hs count as is + continue + else: # recheck given Hs count + h = a.implicit_hydrogens # parsed Hs + g._calc_implicit(n) # recalculate + if a.implicit_hydrogens is None: # atom has invalid valence or aromatic ring. + if a.hybridization == 4: + # this is aromatic ring. just restore given H count. + a._implicit_hydrogens = h + # rare H0 case + if (not keep_radicals and not ignore_aromatic_radicals + and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) + and sum(b.order != 8 for b in bonds[n].values()) == 2): + # c[c]c - aromatic B,C,N,P radical + a._is_radical = True + radicalized.append(n) + elif not keep_radicals and not a.is_radical: # CXSMILES radical not set. + # SMILES doesn't code radicals. so, let's try to guess. + a._is_radical = True + if g._check_implicit(n, h): # radical form is valid + radicalized.append(n) + a._implicit_hydrogens = h + elif ignore: # radical state also has errors. + a._is_radical = False # reset radical state + implicit_mismatch[n] = h + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif h != a.implicit_hydrogens: # H count mismatch. + if a.hybridization == 4: + if (not keep_radicals + and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) + and sum(b.order != 8 for b in bonds[n].values()) == 2): + # c[c]c - aromatic B,C,N,P radical + a._implicit_hydrogens = 0 + a._is_radical = True + radicalized.append(n) + elif ignore: + implicit_mismatch[n] = h + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif g._check_implicit(n, h): # set another possible implicit state. probably Al, P + a._implicit_hydrogens = h + elif not keep_radicals and not a.is_radical: # CXSMILES radical is not set. try radical form + a._is_radical = True + if g._check_implicit(n, h): + a._implicit_hydrogens = h + radicalized.append(n) + # radical state also has errors. + elif ignore: + a._is_radical = False # reset radical state + implicit_mismatch[n] = h + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif ignore: # just ignore it + implicit_mismatch[n] = h + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + + if ignore_carbon_radicals: + for n in radicalized: + a = atoms[n] + if a.atomic_number == 6: + a._is_radical = False + a._implicit_hydrogens += 1 + data['log'].append(f'carbon radical {n} replaced with implicit hydrogen') + elif radicalized: + g.meta['chython_radicalized_atoms'] = radicalized + if data['log'] and 'chython_parsing_log' not in g.meta: + g.meta['chython_parsing_log'] = data['log'] + if implicit_mismatch: + g.meta['chython_implicit_mismatch'] = implicit_mismatch return g def create_reaction(data, *, ignore=True, skip_calc_implicit=False, ignore_bad_isotopes=False, - _r_cls=ReactionContainer, _m_cls=MoleculeContainer): + keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, + ignore_carbon_radicals=False, _r_cls=ReactionContainer, _m_cls=MoleculeContainer): rc, pr, rg = [], [], [] for ms, pms, gr in ((rc, data['reactants'], 'reactant'), (pr, data['products'], 'products'), @@ -85,7 +163,10 @@ def create_reaction(data, *, ignore=True, skip_calc_implicit=False, ignore_bad_i for n, m in enumerate(pms): try: ms.append(create_molecule(m, skip_calc_implicit=skip_calc_implicit, - ignore_bad_isotopes=ignore_bad_isotopes, _cls=_m_cls)) + ignore_bad_isotopes=ignore_bad_isotopes, keep_implicit=keep_implicit, + keep_radicals=keep_radicals, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + ignore_carbon_radicals=ignore_carbon_radicals, _cls=_m_cls)) except ValueError as e: if not ignore: raise diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 2271a052..d491c866 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -143,11 +143,12 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: atom_map[x]['is_radical'] = True postprocess_parsed_reaction(record, remap=remap, ignore=ignore) - rxn = create_reaction(record, ignore_bad_isotopes=ignore_bad_isotopes, _r_cls=_r_cls, _m_cls=_m_cls) + rxn = create_reaction(record, ignore_bad_isotopes=ignore_bad_isotopes, keep_radicals=False, + ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + _r_cls=_r_cls, _m_cls=_m_cls) for mol, tmp in zip(rxn.molecules(), chain(record['reactants'], record['reagents'], record['products'])): - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, - ignore_aromatic_radicals=ignore_aromatic_radicals) + postprocess_molecule(mol, tmp, ignore_stereo=ignore_stereo) return rxn else: record = parser(smiles_tokenize(smi), not ignore) @@ -156,104 +157,17 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: record['log'].extend(log) postprocess_parsed_molecule(record, remap=remap, ignore=ignore) - mol = create_molecule(record, ignore_bad_isotopes=ignore_bad_isotopes, _cls=_m_cls) - postprocess_molecule(mol, record, ignore=ignore, ignore_stereo=ignore_stereo, - ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, - ignore_aromatic_radicals=ignore_aromatic_radicals) + mol = create_molecule(record, ignore_bad_isotopes=ignore_bad_isotopes, keep_radicals=False, + ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + _cls=_m_cls) + postprocess_molecule(mol, record, ignore_stereo=ignore_stereo) return mol -def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, ignore_carbon_radicals=False, - keep_implicit=False, ignore_aromatic_radicals=True): +def postprocess_molecule(molecule, data, *, ignore_stereo=False): mapping = data['mapping'] - atoms = molecule._atoms - bonds = molecule._bonds - charges = molecule._charges - hydrogens = molecule._hydrogens - radicals = molecule._radicals - hyb = molecule.hybridization - radicalized = [] - - implicit_mismatch = {} - if 'chython_parsing_log' in molecule.meta: - log = molecule.meta['chython_parsing_log'] - else: - log = [] - - for n, a in enumerate(data['atoms']): - h = a['hydrogen'] - if h is None: # simple atom token - continue - # bracket token should always contain implicit hydrogens count. - n = mapping[n] - if keep_implicit: # override any calculated hydrogens count. - hydrogens[n] = h - elif (hc := hydrogens[n]) is None: # atom has invalid valence or aromatic ring. - if hyb(n) == 4: # this is aromatic rings. just store given H count. - hydrogens[n] = h - # rare H0 case - if (not ignore_aromatic_radicals and not h and not charges[n] and not radicals[n] and - atoms[n].atomic_number in (5, 6, 7, 15) and sum(b.order != 8 for b in bonds[n].values()) == 2): - # c[c]c - aromatic B,C,N,P radical - radicals[n] = True - radicalized.append(n) - elif not radicals[n]: # CXSMILES radical not set. - # SMILES doesn't code radicals. so, let's try to guess. - radicals[n] = True - if molecule._check_implicit(n, h): # radical form is valid - radicalized.append(n) - hydrogens[n] = h - elif ignore: # radical state also has errors. - radicals[n] = False # reset radical state - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif hc != h: # H count mismatch. - if hyb(n) == 4: - if not h and not charges[n] and not radicals[n] and atoms[n].atomic_number in (5, 6, 7, 15) and \ - sum(b.order != 8 for b in bonds[n].values()) == 2: - # c[c]c - aromatic B,C,N,P radical - hydrogens[n] = 0 - radicals[n] = True - radicalized.append(n) - elif ignore: - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif molecule._check_implicit(n, h): # set another possible implicit state. probably Al, P - hydrogens[n] = h - elif not radicals[n]: # CXSMILES radical is not set. try radical form - radicals[n] = True - if molecule._check_implicit(n, h): - hydrogens[n] = h - radicalized.append(n) - # radical state also has errors. - elif ignore: - radicals[n] = False # reset radical state - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif ignore: # just ignore it - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - - if ignore_carbon_radicals: - for n in radicalized: - if atoms[n].atomic_number == 6: - radicals[n] = False - hydrogens[n] += 1 - log.append(f'carbon radical {n} replaced with implicit hydrogen') - - if implicit_mismatch: - molecule.meta['chython_implicit_mismatch'] = implicit_mismatch - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log if ignore_stereo: return diff --git a/chython/files/daylight/tokenize.py b/chython/files/daylight/tokenize.py index 645d87e9..6bf1eb8c 100644 --- a/chython/files/daylight/tokenize.py +++ b/chython/files/daylight/tokenize.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from re import compile, fullmatch, match, search +from re import compile, match, search from .._mdl import common_isotopes from ...containers.bonds import QueryBond from ...exceptions import IncorrectSmiles, IncorrectSmarts @@ -243,7 +243,7 @@ def _tokenize(smiles): def _atom_parse(token): # [isotope]Element[element][@[@]][H[n]][+-charge][:mapping] - _match = fullmatch(atom_re, token) + _match = atom_re.fullmatch(token) if _match is None: raise IncorrectSmiles(f'atom token invalid {token}') isotope, element, stereo, hydrogen, charge, mapping = _match.groups() @@ -275,16 +275,14 @@ def _atom_parse(token): mapping = int(mapping[1:]) except ValueError: raise IncorrectSmiles('invalid mapping token') - else: - mapping = 0 if element in ('c', 'n', 'o', 'p', 's', 'as', 'se', 'b', 'te'): _type = 8 element = element.capitalize() else: _type = 0 - return _type, {'element': element, 'isotope': isotope, 'mapping': mapping, 'charge': charge, 'is_radical': False, - 'x': 0., 'y': 0., 'z': 0., 'hydrogen': hydrogen, 'stereo': stereo} + return _type, {'element': element, 'isotope': isotope, 'parsed_mapping': mapping, 'charge': charge, + 'implicit_hydrogens': hydrogen, 'stereo': stereo} def _query_parse(token): @@ -372,8 +370,7 @@ def smiles_tokenize(smi): out = [] for token_type, token in tokens: if token_type in (0, 8): # simple atom - out.append((token_type, {'element': token, 'isotope': None, 'mapping': 0, 'charge': 0, 'is_radical': False, - 'x': 0., 'y': 0., 'z': 0., 'hydrogen': None, 'stereo': None})) + out.append((token_type, {'element': token})) elif token_type == 5: out.append(_atom_parse(token)) elif token_type == 10: diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 943d1128..04deaba2 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -25,11 +25,14 @@ class Element(ABC): __slots__ = ('_isotope', '_charge', '_is_radical', '_x', '_y', '_implicit_hydrogens', - '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_xyz', + '_explicit_hydrogens', '_stereo', '_parsed_mapping', '_neighbors', '_heteroatoms', '_hybridization', '_ring_sizes', '_in_ring') __class_cache__ = {} - def __init__(self, isotope: Optional[int] = None): + def __init__(self, isotope: Optional[int] = None, *, + charge: int = 0, is_radical: bool = False, x: float = 0, y: float = 0, + implicit_hydrogens: Optional[int] = None, stereo: Optional[bool] = None, + parsed_mapping: Optional[int] = None): """ Element object with specified isotope @@ -41,11 +44,12 @@ def __init__(self, isotope: Optional[int] = None): elif isotope is not None: raise TypeError('integer isotope number required') self._isotope = isotope - self._charge = 0 - self._is_radical = False - self._x = self._y = 0 - self._implicit_hydrogens = None - self._stereo = None + self._charge = charge + self._is_radical = is_radical + self._x, self._y = x, y + self._implicit_hydrogens = implicit_hydrogens + self._stereo = stereo + self._parsed_mapping = parsed_mapping self._explicit_hydrogens = 0 self._neighbors = 0 From e8a4986e65b0315fb8a84f77fe802753590d08f1 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 2 Nov 2024 20:09:07 +0100 Subject: [PATCH 09/51] some progress in stereo --- chython/algorithms/isomorphism.py | 2 +- chython/algorithms/stereo/graph.py | 96 ++++++++++++++------------- chython/algorithms/stereo/molecule.py | 18 ++--- chython/files/_convert.py | 8 +-- chython/files/_mapping.py | 8 +-- chython/files/daylight/smarts.py | 19 +++--- chython/files/daylight/smiles.py | 9 ++- chython/files/daylight/tokenize.py | 57 +++++----------- chython/files/libinchi/wrapper.py | 4 +- chython/periodictable/base/element.py | 25 ++++--- chython/periodictable/base/query.py | 55 +++++++++------ 11 files changed, 151 insertions(+), 150 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index e2d95da3..a40188a6 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -361,7 +361,7 @@ def _cython_compiled_query(self): else: if isinstance(a, ListElement): v1 = v2 = 0 - for n in a._numbers: + for n in a.atomic_numbers: if n > 56: if n > 116: # Ts, Og n = 116 diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py index 6fe91b76..bb7e5ebb 100644 --- a/chython/algorithms/stereo/graph.py +++ b/chython/algorithms/stereo/graph.py @@ -70,13 +70,10 @@ def tetrahedrons(self: 'Container') -> Tuple[int, ...]: """ Carbon sp3 atoms numbers. """ - atoms = self._atoms - bonds = self._bonds - tetra = [] - for n, atom in atoms.items(): + for n, atom in self._atoms.items(): if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: - env = bonds[n] + env = self._bonds[n] if all(int(x) == 1 for x in env.values()): if sum(int(x) for x in env.values()) > 4: continue @@ -157,14 +154,15 @@ def _translate_tetrahedron_sign(self: 'Container', n, env, s=None): """ if s is None: s = self._atoms[n].stereo + if s is None: + raise KeyError order = self._stereo_tetrahedrons[n] if len(order) == 3: if len(env) == 4: # hydrogen atom passed to env - atoms = self._atoms # hydrogen always last in order try: - order = (*order, next(x for x in env if atoms[x].atomic_number == 1)) # see translate scheme + order = (*order, next(x for x in env if self._atoms[x].atomic_number == 1)) # see translate scheme except StopIteration: raise KeyError elif len(env) != 3: # pyramid or tetrahedron expected @@ -187,21 +185,24 @@ def _translate_cis_trans_sign(self: 'Container', n, m, nn, nm, s=None): :param nm: neighbor of last atom :param s: if None, use existing sign else translate given to molecule """ + try: + n0, n1, n2, n3 = self._stereo_cis_trans[(n, m)] + except KeyError: + n0, n1, n2, n3 = self._stereo_cis_trans[(m, n)] + n, m = m, n # in alkenes sign not order depended + nn, nm = nm, nn + if s is None: - try: - s = self._cis_trans_stereo[(n, m)] - except KeyError: - s = self._cis_trans_stereo[(m, n)] - n, m = m, n # in alkenes sign not order depended - nn, nm = nm, nn + i, j = self._stereo_cis_trans_centers[n] + s = self._bonds[i][j].stereo + if s is None: + raise KeyError - atoms = self._atoms - n0, n1, n2, n3 = self._stereo_cis_trans[(n, m)] if nn == n0: # same start t0 = 0 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: t1 = 3 else: raise KeyError @@ -209,23 +210,23 @@ def _translate_cis_trans_sign(self: 'Container', n, m, nn, nm, s=None): t0 = 1 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: t1 = 2 else: raise KeyError - elif nn == n2 or n2 is None and atoms[nn].atomic_number == 1: + elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: t0 = 2 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: t1 = 3 else: raise KeyError - elif nn == n3 or n3 is None and atoms[nn].atomic_number == 1: + elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: t0 = 3 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: t1 = 2 else: raise KeyError @@ -246,15 +247,16 @@ def _translate_allene_sign(self: 'Container', c, nn, nm, s=None): :param s: if None, use existing sign else translate given to molecule """ if s is None: - s = self._allenes_stereo[c] + s = self._atoms[c].stereo + if s is None: + raise KeyError - atoms = self._atoms n0, n1, n2, n3 = self._stereo_allenes[c] if nn == n0: # same start t0 = 0 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: t1 = 3 else: raise KeyError @@ -262,23 +264,23 @@ def _translate_allene_sign(self: 'Container', c, nn, nm, s=None): t0 = 1 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: t1 = 2 else: raise KeyError - elif nn == n2 or n2 is None and atoms[nn].atomic_number == 1: + elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: t0 = 2 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: t1 = 3 else: raise KeyError - elif nn == n3 or n3 is None and atoms[nn].atomic_number == 1: + elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: t0 = 3 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: t1 = 2 else: raise KeyError @@ -388,21 +390,25 @@ def _stereo_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[in """ Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends """ - return {(n, m): env for (n, *mid, m), env in self._stereo_cumulenes.items() if not len(mid) % 2} - - @cached_property - def _stereo_cis_trans_paths(self) -> Dict[Tuple[int, int], Tuple[int, ...]]: - return {(path[0], path[-1]): path for path in self._stereo_cumulenes if not len(path) % 2} + stereo = {} + for path, env in self._stereo_cumulenes.items(): + if len(path) % 2: + continue + stereo[(path[0], path[-1])] = env + return stereo @cached_property - def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: + def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: """ - Cis-Trans terminal atoms to cis-trans key mapping + Cis-Trans terminal atoms to cis-trans key mapping. Key is central double bond in a cumulene chain. """ terminals = {} - for nm in self._stereo_cis_trans_paths: - n, m = nm - terminals[n] = terminals[m] = nm + for path in self._stereo_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + i = len(path) // 2 + terminals[n] = terminals[m] = (path[i - 1], path[i]) return terminals @cached_property @@ -411,8 +417,10 @@ def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: Cis-Trans terminal atoms counterparts """ counterpart = {} - for nm in self._stereo_cis_trans_paths: - n, m = nm + for path in self._stereo_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] counterpart[n] = m counterpart[m] = n return counterpart @@ -439,11 +447,7 @@ def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: """ Allene center atom to terminals mapping """ - return {c: (path[0], path[-1]) for c, path in self._stereo_allenes_paths.items()} - - @cached_property - def _stereo_allenes_paths(self) -> Dict[int, Tuple[int, ...]]: - return {path[len(path) // 2]: path for path in self._stereo_cumulenes if len(path) % 2} + return {path[len(path) // 2]: (path[0], path[-1]) for path in self._stereo_cumulenes if len(path) % 2} __all__ = ['Stereo'] diff --git a/chython/algorithms/stereo/molecule.py b/chython/algorithms/stereo/molecule.py index 016df003..7c443a0b 100644 --- a/chython/algorithms/stereo/molecule.py +++ b/chython/algorithms/stereo/molecule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -434,11 +434,9 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): return solved def __wedge_sign(self: 'MoleculeContainer', order): - plane = self._plane - if order[-1]: # allene s = self._translate_allene_sign(order[-2], *order[:2]) - v = _allene_sign(1, plane[order[2]], plane[order[3]], plane[order[1]]) + v = _allene_sign(1, self._atoms[order[2]].xy, self._atoms[order[3]].xy, self._atoms[order[1]].xy) if not v: logger.info(f'need 2d clean. allenes wedge stereo ambiguous for atom {order[-2]}') if s: @@ -450,11 +448,15 @@ def __wedge_sign(self: 'MoleculeContainer', order): s = self._translate_tetrahedron_sign(n, order[:-2]) # need recalculation if XY changed if len(order) == 5: - v = _pyramid_sign((*plane[n], 0), - (*plane[order[0]], 1), (*plane[order[1]], 0), (*plane[order[2]], 0)) + v = _pyramid_sign((*self._atoms[n].xy, 0), + (*self._atoms[order[0]].xy, 1), + (*self._atoms[order[1]].xy, 0), + (*self._atoms[order[2]].xy, 0)) else: - v = _pyramid_sign((*plane[order[3]], 0), - (*plane[order[0]], 1), (*plane[order[1]], 0), (*plane[order[2]], 0)) + v = _pyramid_sign((*self._atoms[order[3]].xy, 0), + (*self._atoms[order[0]].xy, 1), + (*self._atoms[order[1]].xy, 0), + (*self._atoms[order[2]].xy, 0)) if not v: logger.info(f'need 2d clean. tetrahedron wedge stereo ambiguous for atom {n}') if s: diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 819389e1..a450146e 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -30,16 +30,14 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False bonds = g._bonds mapping = data['mapping'] for n, atom in enumerate(data['atoms']): - if abs(atom['charge']) > 4: - raise ValueError('formal charge should be in range [-4, 4]') n = mapping[n] e = Element.from_symbol(atom.pop('element')) try: atoms[n] = e(**atom) - except ValueError: + except (ValueError, TypeError): if not ignore_bad_isotopes: raise - del atom['isotope'] # reset isotope mark on errors. + del atom['isotope'] # reset isotope mark on errors and try again. atoms[n] = e(**atom) bonds[n] = {} @@ -52,7 +50,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if n in bonds[m]: raise ValueError('atoms already bonded') bonds[n][m] = bonds[m][n] = Bond(b) - if any(a['z'] for a in data['atoms']): + if any(a.get('z') for a in data['atoms']): # store conformer g._conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] diff --git a/chython/files/_mapping.py b/chython/files/_mapping.py index e8d5915c..331eaa3e 100644 --- a/chython/files/_mapping.py +++ b/chython/files/_mapping.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -24,10 +24,10 @@ def postprocess_parsed_molecule(data, *, remap=False, ignore=True): if remap: remapped = list(range(1, len(data['atoms']) + 1)) else: - length = count(max(x['mapping'] for x in data['atoms']) + 1) + length = count(max(x.get('parsed_mapping') or 0 for x in data['atoms']) + 1) remapped, used = [], set() for n, atom in enumerate(data['atoms']): - m = atom['mapping'] + m = atom.get('parsed_mapping') if not m: remapped.append(next(length)) elif m in used: @@ -47,7 +47,7 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): for molecule in data[i]: used = set() for atom in molecule['atoms']: - m = atom['mapping'] + m = atom.get('parsed_mapping') if m: if m in used: if not ignore: diff --git a/chython/files/daylight/smarts.py b/chython/files/daylight/smarts.py index 2885b8a2..4f095e03 100644 --- a/chython/files/daylight/smarts.py +++ b/chython/files/daylight/smarts.py @@ -21,7 +21,7 @@ from .parser import parser from .tokenize import smarts_tokenize from ...containers import QueryContainer -from ...periodictable import QueryElement +from ...periodictable import ListElement, QueryElement cx_radicals = compile(r'\^[1-7]:[0-9]+(?:,[0-9]+)*') @@ -104,16 +104,17 @@ def smarts(data: str): g = QueryContainer() mapping = {} - free = count(max(a['mapping'] for a in data['atoms']) + 1) + free = count(max(a.get('parsed_mapping', 0) for a in data['atoms']) + 1) for i, a in enumerate(data['atoms']): - mapping[i] = n = a.pop('mapping') or next(global_free_masked if a['masked'] else free) + mapping[i] = n = a.pop('parsed_mapping', 0) or next(global_free_masked if a.get('masked') else free) e = a.pop('element') - if it := a.pop('isotope'): - if isinstance(e, int): - e = QueryElement.from_atomic_number(e)(it) - else: - e = QueryElement.from_symbol(e)(it) - g.add_atom(e, n, **a) + if isinstance(e, int): + e = QueryElement.from_atomic_number(e) + elif isinstance(e, str): + e = QueryElement.from_symbol(e) + else: + e = ListElement(e) + g.add_atom(e(**a), n) for n, m, b in data['bonds']: g.add_bond(mapping[n], mapping[m], b) diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index d491c866..82687724 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -171,14 +171,14 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if ignore_stereo: return - stereo_atoms = [(n, s) for n, a in enumerate(data['atoms']) if (s := a['stereo']) is not None] + stereo_atoms = [(n, s) for n, a in enumerate(data['atoms']) if (s := a.get('stereo')) is not None] if not stereo_atoms and not data['stereo_bonds']: return st = molecule._stereo_tetrahedrons sa = molecule._stereo_allenes sat = molecule._stereo_allenes_terminals - ctt = molecule._stereo_cis_trans_terminals + ctc = molecule._stereo_cis_trans_counterpart order = {mapping[n]: [mapping[m] for m in ms] for n, ms in data['order'].items()} @@ -203,9 +203,8 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): for n, ns in stereo_bonds.items(): if n in seen: continue - if n in ctt: - nm = ctt[n] - m = nm[1] if nm[0] == n else nm[0] + if n in ctc: + m = ctc[n] if m in stereo_bonds: seen.add(m) n2, s2 = stereo_bonds[m].popitem() diff --git a/chython/files/daylight/tokenize.py b/chython/files/daylight/tokenize.py index 6bf1eb8c..e8f3c7e6 100644 --- a/chython/files/daylight/tokenize.py +++ b/chython/files/daylight/tokenize.py @@ -17,10 +17,8 @@ # along with this program; if not, see . # from re import compile, match, search -from .._mdl import common_isotopes from ...containers.bonds import QueryBond from ...exceptions import IncorrectSmiles, IncorrectSmarts -from ...periodictable.element import ListElement # -,= OR bonds supported @@ -49,7 +47,6 @@ # 12: in ring bond -atomic_numbers = dict(enumerate(common_isotopes, 1)) iso_re = compile(r'^[0-9]+') chg_re = compile(r'[+-][1-4+-]?') mpp_re = compile(r':[1-9][0-9]*$') @@ -286,19 +283,18 @@ def _atom_parse(token): def _query_parse(token): + out = {} if isotope := match(iso_re, token): token = token[isotope.end():] # remove isotope substring - isotope = int(isotope.group()) + out['isotope'] = int(isotope.group()) if charge := search(chg_re, token): token = token[:charge.start()] + token[charge.end():] # remove charge substring - charge = charge_dict[charge.group()] - else: - charge = 0 + out['charge'] = charge_dict[charge.group()] + if mapping := search(mpp_re, token): token = token[:mapping.start()] - mapping = int(mapping.group()[1:]) - else: - mapping = 0 + out['parsed_mapping'] = int(mapping.group()[1:]) + if stereo := search(str_re, token): # drop stereo mark. unsupported token = token[:stereo.start()] + token[stereo.end():] @@ -308,35 +304,21 @@ def _query_parse(token): element = [int(x[1:]) if x.startswith('#') else x for x in element.split(',')] if len(element) == 1: element = element[0] - else: # only atoms supported - tmp = [] - for x in element: - if isinstance(x, int): - try: - tmp.append(atomic_numbers[x]) - except KeyError as e: - raise IncorrectSmiles('Invalid atomic number') from e - elif x in common_isotopes: - tmp.append(x) - else: - raise IncorrectSmarts('Invalid element symbol') - element = ListElement(tmp) else: raise IncorrectSmarts('Empty element') + out['element'] = element - hybridization = rings_sizes = neighbors = hydrogens = heteroatoms = None - masked = False for p in primitives[1:]: # parse hydrogens (h), neighbors (D), rings_sizes (r or !R), hybridization == 4 (a) if not p: continue elif p == 'a': # aromatic atom - hybridization = 4 + out['hybridization'] = 4 elif p == 'A': # ignore aliphatic mark. Ad-Hoc for Marwin. continue elif p == '!R': - rings_sizes = 0 + out['ring_sizes'] = 0 elif p == 'M': - masked = True + out['masked'] = True else: p = p.split(',') if len(p) != 1 and len({x[0] for x in p}) > 1: @@ -350,19 +332,16 @@ def _query_parse(token): raise IncorrectSmarts('Unsupported SMARTS primitive') if t == 'D': - neighbors = p + out['neighbors'] = p elif t == 'h': - hydrogens = p + out['implicit_hydrogens'] = p elif t == 'r': # r - rings_sizes = p + out['ring_sizes'] = p elif t == 'x': - heteroatoms = p + out['heteroatoms'] = p else: # z - hybridization = p - - return 0, {'element': element, 'isotope': isotope, 'mapping': mapping, 'charge': charge, 'is_radical': False, - 'heteroatoms': heteroatoms, 'hydrogens': hydrogens, 'neighbors': neighbors, - 'rings_sizes': rings_sizes, 'hybridization': hybridization, 'masked': masked} + out['hybridization'] = p + return 0, out def smiles_tokenize(smi): @@ -385,9 +364,7 @@ def smarts_tokenize(smi): out = [] for token_type, token in tokens: if token_type in (0, 8): # simple atom - out.append((0, {'element': token, 'isotope': None, 'mapping': 0, 'charge': 0, 'is_radical': False, - 'heteroatoms': None, 'hydrogens': None, 'neighbors': None, - 'rings_sizes': None, 'hybridization': None, 'masked': False})) + out.append((0, {'element': token})) elif token_type == 5: out.append(_query_parse(token)) else: diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 0fb7daf3..55749f34 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -138,7 +138,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): st = molecule._stereo_tetrahedrons sa = molecule._stereo_allenes - ctt = molecule._stereo_cis_trans_terminals + ctc = molecule._stereo_cis_trans_counterpart stereo = [] for n, ngb, s in data['stereo_atoms']: @@ -151,7 +151,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): stereo.append((molecule.add_atom_stereo, n, nn + 1, mn + 1, s)) for n, m, nn, nm, s in data['stereo_cumulenes']: n += 1 - if n in ctt: + if n in ctc: stereo.append((molecule.add_cis_trans_stereo, n, m + 1, nn + 1, nm + 1, s)) while stereo: diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 04deaba2..56e9f3d3 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -30,7 +30,7 @@ class Element(ABC): __class_cache__ = {} def __init__(self, isotope: Optional[int] = None, *, - charge: int = 0, is_radical: bool = False, x: float = 0, y: float = 0, + charge: int = 0, is_radical: bool = False, x: float = 0., y: float = 0., implicit_hydrogens: Optional[int] = None, stereo: Optional[bool] = None, parsed_mapping: Optional[int] = None): """ @@ -38,15 +38,11 @@ def __init__(self, isotope: Optional[int] = None, *, :param isotope: Isotope number of element """ - if isinstance(isotope, int): - if isotope not in self.isotopes_distribution: - raise ValueError(f'isotope number {isotope} impossible or not stable for {self.atomic_symbol}') - elif isotope is not None: - raise TypeError('integer isotope number required') - self._isotope = isotope - self._charge = charge - self._is_radical = is_radical - self._x, self._y = x, y + self.isotope = isotope + self.charge = charge + self.is_radical = is_radical + self.x, self.y = x, y + self._implicit_hydrogens = implicit_hydrogens self._stereo = stereo self._parsed_mapping = parsed_mapping @@ -81,6 +77,15 @@ def isotope(self) -> Optional[int]: """ return self._isotope + @isotope.setter + def isotope(self, value: Optional[int]): + if isinstance(value, int): + if value not in self.isotopes_distribution: + raise ValueError(f'isotope number {value} impossible or not stable for {self.atomic_symbol}') + elif value is not None: + raise TypeError('integer isotope number required') + self._isotope = value + @property def atomic_mass(self) -> float: mass = self.isotopes_masses diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 325c0947..2089bc17 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -49,10 +49,11 @@ def _validate(value, prop): class Query(ABC): __slots__ = ('_neighbors', '_hybridization', '_masked') - def __init__(self): - self._neighbors = () - self._hybridization = () - self._masked = False + def __init__(self, neighbors: Union[int, Tuple[int, ...], None] = None, + hybridization: Union[int, Tuple[int, ...], None] = None, masked: bool = False): + self.neighbors = neighbors + self.hybridization = hybridization + self.masked = masked @property @abstractmethod @@ -118,14 +119,16 @@ def __repr__(self): class ExtendedQuery(Query, ABC): __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens', '_stereo') - def __init__(self): - super().__init__() - self._charge = 0 - self._is_radical = False - self._heteroatoms = () - self._ring_sizes = () - self._implicit_hydrogens = () - self._stereo = None + def __init__(self, charge: int = 0, is_radical: bool = False, heteroatoms: Union[int, Tuple[int, ...], None] = None, + ring_sizes: Union[int, Tuple[int, ...], None] = None, + implicit_hydrogens: Union[int, Tuple[int, ...], None] = None, stereo: Optional[bool] = None, **kwargs): + super().__init__(**kwargs) + self.charge = charge + self.is_radical = is_radical + self.heteroatoms = heteroatoms + self.ring_sizes = ring_sizes + self.implicit_hydrogens = implicit_hydrogens + self._stereo = stereo @property def charge(self) -> int: @@ -292,14 +295,22 @@ def __hash__(self): class ListElement(ExtendedQuery): __slots__ = ('_elements', '__dict__') - def __init__(self, elements: List[str]): + def __init__(self, elements: List[str], **kwargs): """ Elements list """ if not isinstance(elements, (list, tuple)) or not elements: raise ValueError('invalid elements list') - super().__init__() - self._elements = tuple(elements) + tmp = [] + for x in elements: + if isinstance(x, int): + tmp.append(Element.from_atomic_number(x).__name__) + elif isinstance(x, str): + tmp.append(Element.from_symbol(x).__name__) + else: + raise ValueError(f'invalid element: {x}') + super().__init__(**kwargs) + self._elements = tuple(tmp) @property def atomic_symbol(self) -> str: @@ -366,11 +377,9 @@ def __repr__(self): class QueryElement(ExtendedQuery, ABC): __slots__ = ('_isotope',) - def __init__(self, isotope: Optional[int] = None): - if isotope is not None and not isinstance(isotope, int): - raise TypeError('isotope must be an int') - super().__init__() - self._isotope = isotope + def __init__(self, isotope: Optional[int] = None, **kwargs): + super().__init__(**kwargs) + self.isotope = isotope def __repr__(self): if self.isotope: @@ -392,6 +401,12 @@ def atomic_number(self) -> int: def isotope(self): return self._isotope + @isotope.setter + def isotope(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise TypeError('isotope must be an int') + self._isotope = value + @classmethod def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: """ From 0252529b90ffb3d3ce45f3bdc3dae4d8e0ad5265 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 2 Nov 2024 20:15:35 +0100 Subject: [PATCH 10/51] revert --- chython/algorithms/stereo/graph.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py index bb7e5ebb..8ad032fd 100644 --- a/chython/algorithms/stereo/graph.py +++ b/chython/algorithms/stereo/graph.py @@ -411,6 +411,19 @@ def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: terminals[n] = terminals[m] = (path[i - 1], path[i]) return terminals + @cached_property + def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: + """ + Cis-Trans terminal atoms to terminal pair mapping. + """ + terminals = {} + for path in self._stereo_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + terminals[n] = terminals[m] = (n, m) + return terminals + @cached_property def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: """ From 932a30f8a0a422e4fe7904780e2037a10941f26f Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 3 Nov 2024 14:36:38 +0100 Subject: [PATCH 11/51] another portion of fixes. --- chython/algorithms/aromatics/kekule.py | 4 +- chython/algorithms/aromatics/thiele.py | 34 +++++----- chython/algorithms/calculate2d/__init__.py | 70 +++++++++---------- chython/algorithms/depict.py | 69 +++++++++---------- chython/algorithms/smiles.py | 25 +++---- chython/algorithms/standardize/molecule.py | 4 +- chython/algorithms/standardize/resonance.py | 2 +- chython/algorithms/stereo/graph.py | 5 +- chython/algorithms/stereo/molecule.py | 74 +++++++++++++-------- chython/containers/molecule.py | 52 ++++++++++++++- chython/files/MRVrw.py | 12 ++-- chython/files/RDFrw.py | 18 ++--- chython/files/SDFrw.py | 10 +-- chython/files/_convert.py | 13 ++-- chython/files/_mdl/stereo.py | 39 ++--------- chython/files/daylight/smarts.py | 3 +- chython/files/xyz.py | 19 ++---- chython/periodictable/base/element.py | 13 ---- chython/reactor/base.py | 4 +- 19 files changed, 240 insertions(+), 230 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index de51744b..f1df888c 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -50,7 +50,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool atoms.add(n) atoms.add(m) for n in atoms: - self._calc_implicit(n) + self.calc_implicit(n) self.flush_cache() return True return fixed @@ -69,7 +69,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): atoms.add(n) atoms.add(m) for n in atoms: - copy._calc_implicit(n) + copy.calc_implicit(n) yield copy def __fix_rings(self: 'MoleculeContainer'): diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 43030a86..0b2ce586 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -56,9 +56,6 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: atoms = self._atoms bonds = self._bonds nsc = self.not_special_connectivity - sh = self.hybridization - charges = self._charges - hydrogens = self._hydrogens rings = defaultdict(set) # aromatic? skeleton. include quinones tetracycles = [] @@ -73,13 +70,13 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: # only B C N O P S with 2-3 neighbors. detects this: C1=CC=CP12=CC=CC=C2 if any(atoms[n].atomic_number not in (6, 7, 8, 16, 5, 15) or len(nsc[n]) > 3 for n in ring): continue - sp2 = sum(sh(n) == 2 for n in ring) + sp2 = sum(atoms[n].hybridization == 2 for n in ring) if sp2 == lr: # benzene like if lr == 4: # two bonds condensed aromatic rings tetracycles.append(ring) else: if fix_tautomers and lr % 2: # find potential pyrroles - acceptors.update(n for n in ring if atoms[n].atomic_number == 7 and not charges[n]) + acceptors.update(n for n in ring if (a := atoms[n]).atomic_number == 7 and not a.charge) n, *_, m = ring rings[n].add(m) rings[m].add(n) @@ -88,11 +85,12 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: rings[m].add(n) elif 4 < lr == sp2 + 1: # pyrroles, furanes, etc try: - n = next(n for n in ring if sh(n) == 1) + n = next(n for n in ring if atoms[n].hybridization == 1) except StopIteration: # exotic, just skip continue - an = atoms[n].atomic_number - if (c := charges[n]) == -1: + a = atoms[n] + an = a.atomic_number + if (c := a.charge) == -1: if an != 6 or lr != 5: # skip any but ferrocene continue elif c: # skip any charged @@ -149,8 +147,8 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: acceptors.discard(current) pyrroles.discard(start) pyrroles.add(current) - hydrogens[current] = 1 - hydrogens[start] = 0 + atoms[current]._implicit_hydrogens = 1 + atoms[start]._implicit_hydrogens = 0 break else: continue @@ -163,7 +161,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: else: # path not found continue for n, m, o in path: - bonds[n][m]._Bond__order = o # noqa + bonds[n][m]._order = o if not acceptors: break @@ -205,24 +203,24 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: for ring in tetracycles: if seen.issuperset(ring): n, *_, m = ring - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 for ring in rings: n, *_, m = ring - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 self.flush_cache() for ring in freaks: # aromatize rule based for q in freak_rules: if next(q.get_mapping(self, searching_scope=ring, automorphism_filter=False), None): n, *_, m = ring - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 break if freaks: self.flush_cache() # flush again diff --git a/chython/algorithms/calculate2d/__init__.py b/chython/algorithms/calculate2d/__init__.py index bef7b1f0..a787abc5 100644 --- a/chython/algorithms/calculate2d/__init__.py +++ b/chython/algorithms/calculate2d/__init__.py @@ -77,9 +77,11 @@ def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule']): else: bond_reduce = 1. - self_plane = self._plane + atoms = self._atoms for n, (x, y) in plane.items(): - self_plane[n] = (x / bond_reduce, y / bond_reduce) + a = atoms[n] + a._x = x / bond_reduce + a._y = y / bond_reduce if self.connected_components_count > 1: shift_x = 0. @@ -88,27 +90,28 @@ def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule']): self.__dict__.pop('__cached_method__repr_svg_', None) def _fix_plane_mean(self: 'MoleculeContainer', shift_x: float, shift_y=0., component=None) -> float: - plane = self._plane + atoms = self._atoms if component is None: - component = plane + component = atoms - left_atom = min(component, key=lambda x: plane[x][0]) - right_atom = max(component, key=lambda x: plane[x][0]) + left_atom = atoms[min(component, key=lambda x: atoms[x].x)] + right_atom = atoms[max(component, key=lambda x: atoms[x].x)] - min_x = plane[left_atom][0] - shift_x - if len(self._atoms[left_atom].atomic_symbol) == 2: + min_x = left_atom.x - shift_x + if len(left_atom.atomic_symbol) == 2: min_x -= .2 - max_x = plane[right_atom][0] - min_x - min_y = min(plane[x][1] for x in component) - max_y = max(plane[x][1] for x in component) + max_x = right_atom.x - min_x + min_y = min(atoms[x].y for x in component) + max_y = max(atoms[x].y for x in component) mean_y = (max_y + min_y) / 2 - shift_y for n in component: - x, y = plane[n] - plane[n] = (x - min_x, y - mean_y) + a = atoms[n] + a._x -= min_x + a._y -= mean_y - if -.18 <= plane[right_atom][1] <= .18: - factor = self._hydrogens[right_atom] + if -.18 <= right_atom.y <= .18: + factor = right_atom.implicit_hydrogens if factor == 1: max_x += .15 elif factor: @@ -116,21 +119,22 @@ def _fix_plane_mean(self: 'MoleculeContainer', shift_x: float, shift_y=0., compo return max_x def _fix_plane_min(self: 'MoleculeContainer', shift_x: float, shift_y=0., component=None) -> float: - plane = self._plane + atoms = self._atoms if component is None: - component = plane + component = atoms - right_atom = max(component, key=lambda x: plane[x][0]) - min_x = min(plane[x][0] for x in component) - shift_x - max_x = plane[right_atom][0] - min_x - min_y = min(plane[x][1] for x in component) - shift_y + right_atom = atoms[max(component, key=lambda x: atoms[x].x)] + min_x = min(atoms[x].x for x in component) - shift_x + max_x = right_atom.x - min_x + min_y = min(atoms[x].y for x in component) - shift_y for n in component: - x, y = plane[n] - plane[n] = (x - min_x, y - min_y) + a = atoms[n] + a._x -= min_x + a._y -= min_y - if shift_y - .18 <= plane[right_atom][1] <= shift_y + .18: - factor = self._hydrogens[right_atom] + if shift_y - .18 <= right_atom.y <= shift_y + .18: + factor = right_atom.implicit_hydrogens if factor == 1: max_x += .15 elif factor: @@ -138,21 +142,9 @@ def _fix_plane_min(self: 'MoleculeContainer', shift_x: float, shift_y=0., compon return max_x def __clean2d_prepare(self: 'MoleculeContainer', entry): - hydrogens = self._hydrogens - charges = self._charges - allenes_stereo = self._allenes_stereo - atoms_stereo = self._atoms_stereo - self._charges = self._hydrogens = {n: 0 for n in hydrogens} - self._atoms_stereo = self._allenes_stereo = {} - w = {n: random() for n in hydrogens} + w = {n: random() for n in self._atoms} w[entry] = -1 - try: - smiles, order = self._smiles(w.__getitem__, random=True, _return_order=True) - finally: - self._hydrogens = hydrogens - self._charges = charges - self._allenes_stereo = allenes_stereo - self._atoms_stereo = atoms_stereo + smiles, order = self._smiles(w.__getitem__, random=True, charges=False, stereo=False, _return_order=True) return ''.join(smiles).replace('~', '-'), order diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index 4eab3f82..1189d32a 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2022 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # Copyright 2019-2020 Dinar Batyrshin # This file is part of chython. # @@ -206,17 +206,17 @@ def depict(self: Union['MoleculeContainer', 'DepictMolecule'], *, width=None, he :param clean2d: calculate coordinates if necessary. """ uid = str(uuid4()) - values = self._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + atoms = self._atoms.values() + min_x = min(a.x for a in atoms) + max_x = max(a.x for a in atoms) + min_y = min(a.y for a in atoms) + max_y = max(a.y for a in atoms) if clean2d and len(self) > 1 and max_y - min_y < .01 and max_x - min_x < 0.01: self.clean2d() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for a in atoms) + max_x = max(a.x for a in atoms) + min_y = min(a.y for a in atoms) + max_y = max(a.y for a in atoms) bonds = self.__render_bonds() atoms, define, masks = self.__render_atoms(uid) @@ -247,8 +247,8 @@ def _repr_svg_(self): return self.depict() def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): + atoms = self._atoms svg = [] - plane = self._plane double_space = _render_config['double_space'] triple_space = _render_config['triple_space'] wedge_space = _render_config['wedge_space'] @@ -260,8 +260,8 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): wedge[n].add(m) wedge[m].add(n) - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy ny, my = -ny, -my dx, dy = _rotate_vector(0, wedge_space, mx - nx, ny - my) @@ -272,8 +272,8 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): if m in wedge[n]: continue order = bond.order - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy ny, my = -ny, -my if order in (1, 4): svg.append(f' ') @@ -291,18 +291,18 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): f'stroke-dasharray="{dash1:.2f} {dash2:.2f}"/>') for ring in self.aromatic_rings: - cx = sum(plane[n][0] for n in ring) / len(ring) - cy = sum(plane[n][1] for n in ring) / len(ring) + cx = sum(atoms[n].x for n in ring) / len(ring) + cy = sum(atoms[n].y for n in ring) / len(ring) for n, m in zip(ring, ring[1:]): - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy aromatic = _render_aromatic_bond(nx, ny, mx, my, cx, cy) if aromatic: svg.append(aromatic) - nx, ny = plane[ring[-1]] - mx, my = plane[ring[0]] + nx, ny = atoms[ring[-1]].xy + mx, my = atoms[ring[0]].xy aromatic = _render_aromatic_bond(nx, ny, mx, my, cx, cy) if aromatic: svg.append(aromatic) @@ -310,10 +310,6 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): def __render_atoms(self: 'MoleculeContainer', uid): bonds = self._bonds - plane = self._plane - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens carbon = _render_config['carbon'] mapping = _render_config['mapping'] @@ -360,14 +356,13 @@ def __render_atoms(self: 'MoleculeContainer', uid): mask = [] for n, atom in self._atoms.items(): - x, y = plane[n] - y = -y + x, y = atom.x, -atom.y symbol = atom.atomic_symbol - if not bonds[n] or symbol != 'C' or carbon or charges[n] or radicals[n] or atom.isotope or n in cumulenes: - if charges[n]: + if not bonds[n] or symbol != 'C' or carbon or atom.charge or atom.is_radical or atom.isotope or n in cumulenes: + if atom.charge: others.append(f' ' - f'{_render_charge[charges[n]]}{"↑" if radicals[n] else ""}') - elif radicals[n]: + f'{_render_charge[atom.charge]}{"↑" if atom.is_radical else ""}') + elif atom.is_radical: others.append(f' ↑') if atom.isotope: others.append(f' ') - h = hydrogens[n] + h = atom.implicit_hydrogens if h == 1: h = 'H' elif h: @@ -463,11 +458,11 @@ def depict(self: 'ReactionContainer', *, width=None, height=None, clean2d: bool if clean2d: for m in self.molecules(): if len(m) > 1: - values = m._plane.values() # noqa - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + atoms = m._atoms.values() + min_x = min(a.x for a in atoms) + max_x = max(a.x for a in atoms) + min_y = min(a.y for a in atoms) + max_y = max(a.y for a in atoms) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() self.fix_positions() diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index 412c76e0..b400a259 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -476,19 +476,20 @@ def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): return '~' def __ct_map(self, adjacency): + stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} + if not stereo_bonds: + return {} ct_map = {} - cts = self._cis_trans_stereo - if not cts: - return ct_map + ctc = self._stereo_cis_trans_centers ctt = self._stereo_cis_trans_terminals sct = self._stereo_cis_trans - ctc = self._stereo_cis_trans_counterpart + ctcp = self._stereo_cis_trans_counterpart seen = set() for k, vs in adjacency.items(): seen.add(k) - if (ts := ctt.get(k)) and ts in cts: - env = sct[ts] + if (cs := ctc.get(k)) and stereo_bonds.issuperset(cs): + env = sct[ctt[k]] for v in vs: if v in env: if (k, v) in ct_map: @@ -497,11 +498,11 @@ def __ct_map(self, adjacency): s = ct_map[(k, x)] ct_map[(k, v)] = not s # X/C(/R)=, C(\X)(/R)=, C(=C(\X)/R)=C= ct_map[(v, k)] = s - if y := ctt.get(v): # =C(\X)/R=, C(\X)(/R=)= + if y := ctc.get(v): # =C(\X)/R=, C(\X)(/R=)= ct_map[v] = k seen.add(y) - elif ts in seen: - o = ctc[k] + elif cs in seen: + o = ctcp[k] on = ct_map[o] s = ct_map[(o, on)] if not self._translate_cis_trans_sign(k, o, v, on): @@ -509,17 +510,17 @@ def __ct_map(self, adjacency): ct_map[(k, v)] = s ct_map[k] = v ct_map[(v, k)] = not s # C/R=, R\1...C/1 - if y := ctt.get(v): + if y := ctc.get(v): ct_map[v] = k seen.add(y) else: # left entry to double bond - if y := ctt.get(v): # 1,3-diene case + if y := ctc.get(v): # 1,3-diene case ct_map[v] = k seen.add(y) ct_map[(v, k)] = True # R/C=, C\1=...R/1, C(/R=)=, C(=C(/R=))=C= ct_map[(k, v)] = False # first DOWN ct_map[k] = v - seen.add(ts) + seen.add(cs) return ct_map diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 89bf57f5..c9fb0893 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -426,7 +426,7 @@ def clean_isotopes(self: 'MoleculeContainer') -> bool: isotopes = [x for x in atoms.values() if x.isotope] if isotopes: for i in isotopes: - i._Core__isotope = None + i._isotope = None self.flush_cache() self.fix_stereo() return True @@ -436,7 +436,7 @@ def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): bonds = self._bonds charges = self._charges radicals = self._radicals - calc_implicit = self._calc_implicit + calc_implicit = self.calc_implicit log = [] fixed = set() diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 1270e3dd..696b977c 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -38,7 +38,7 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals charges = self._charges radicals = self._radicals bonds = self._bonds - calc_implicit = self._calc_implicit + calc_implicit = self.calc_implicit entries, exits, rads, constrains, nitrogen_cat, nitrogen_ani, sulfur_cat = self.__entries() hs = set() while len(rads) > 1: diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py index 8ad032fd..59523deb 100644 --- a/chython/algorithms/stereo/graph.py +++ b/chython/algorithms/stereo/graph.py @@ -414,14 +414,15 @@ def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: @cached_property def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: """ - Cis-Trans terminal atoms to terminal pair mapping. + Cis-Trans terminal and central atoms to terminal pair mapping. """ terminals = {} for path in self._stereo_cumulenes: if len(path) % 2: continue n, m = path[0], path[-1] - terminals[n] = terminals[m] = (n, m) + i = len(path) // 2 + terminals[n] = terminals[m] = terminals[path[i]] = terminals[path[i - 1]] = (n, m) return terminals @cached_property diff --git a/chython/algorithms/stereo/molecule.py b/chython/algorithms/stereo/molecule.py index 7c443a0b..9415d551 100644 --- a/chython/algorithms/stereo/molecule.py +++ b/chython/algorithms/stereo/molecule.py @@ -204,19 +204,19 @@ def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): """ Calculate cis-trans stereo bonds from given 2d coordinates. Unusable for SMILES and INCHI. """ - cis_trans_stereo = self._cis_trans_stereo - plane = self._plane + atoms = self._atoms flag = False while self._chiral_cis_trans: - stereo = {} + stereo = False for nm in self._chiral_cis_trans: n, m = nm n1, m1, *_ = self._stereo_cis_trans[nm] - s = _cis_trans_sign(plane[n1], plane[n], plane[m], plane[m1]) + s = _cis_trans_sign(atoms[n1].xy, atoms[n].xy, atoms[m].xy, atoms[m1].xy) if s: - stereo[nm] = s > 0 + stereo = True + i, j = self._stereo_cis_trans_centers[n] + self._bonds[i][j]._stereo = s > 0 if stereo: - cis_trans_stereo.update(stereo) flag = True self.flush_stereo_cache() else: @@ -234,19 +234,21 @@ def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mar See and """ - if n not in self._atoms: + try: + atom = self._atoms[n] + except KeyError: raise AtomNotFound - if n in self._atoms_stereo or n in self._allenes_stereo: + if atom.stereo is not None: raise IsChiral if not isinstance(mark, bool): raise TypeError('stereo mark should be bool') if n in self._chiral_tetrahedrons: - self._atoms_stereo[n] = self._translate_tetrahedron_sign(n, env, mark) + atom._stereo = self._translate_tetrahedron_sign(n, env, mark) if clean_cache: self.flush_cache() elif n in self._chiral_allenes: - self._allenes_stereo[n] = self._translate_allene_sign(n, *env, mark) + atom._stereo = self._translate_allene_sign(n, *env, mark) if clean_cache: self.flush_cache() else: # only tetrahedrons supported @@ -272,15 +274,19 @@ def add_cis_trans_stereo(self: 'MoleculeContainer', n: int, m: int, n1: int, n2: raise AtomNotFound if not isinstance(mark, bool): raise TypeError('stereo mark should be bool') - if (n, m) in self._cis_trans_stereo or (m, n) in self._cis_trans_stereo: + + if n not in self._stereo_cis_trans_counterpart or self._stereo_cis_trans_counterpart[n] != m: + raise NotChiral + i, j = self._stereo_cis_trans_centers[n] + if self._bonds[i][j].stereo is not None: raise IsChiral if (n, m) in self._chiral_cis_trans: - self._cis_trans_stereo[(n, m)] = self._translate_cis_trans_sign(n, m, n1, n2, mark) + self._bonds[i][j] = self._translate_cis_trans_sign(n, m, n1, n2, mark) if clean_cache: self.flush_cache() elif (m, n) in self._chiral_cis_trans: - self._cis_trans_stereo[(m, n)] = self._translate_cis_trans_sign(m, n, n2, n1, mark) + self._bonds[i][j] = self._translate_cis_trans_sign(m, n, n2, n1, mark) if clean_cache: self.flush_cache() else: @@ -372,7 +378,7 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): if env[3]: orders.append((env[3], env[0], *term[::-1], n, True)) space.append(orders) - for n, s in self._atoms_stereo.items(): + for n, s in atoms_stereo.items(): order = list(self._stereo_tetrahedrons[n]) orders = [(*order, n, False)] for _ in range(1, len(order)): @@ -478,12 +484,18 @@ def _chiral_allenes(self) -> Set[int]: @cached_property def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: - if not self._atoms_stereo and not self._allenes_stereo and not self._cis_trans_stereo: + stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} + stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} + if not stereo_atoms and not stereo_bonds: return self.atoms_order + morgan = self.atoms_order.copy() - atoms_stereo = set(self._atoms_stereo) - cis_trans_stereo = set(self._cis_trans_stereo) - allenes_stereo = set(self._allenes_stereo) + atoms_stereo = stereo_atoms.intersection(self.tetrahedrons) + allenes_stereo = stereo_atoms - atoms_stereo + + cis_trans_terminals = self._stereo_cis_trans_terminals + cis_trans_stereo = {cis_trans_terminals[n] for n in stereo_bonds} + while True: # try iteratively differentiate stereo atoms. morgan, atoms_stereo, cis_trans_stereo, allenes_stereo, atoms_groups, cis_trans_groups, allenes_groups = \ @@ -599,6 +611,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): cis_trans = self._stereo_cis_trans allenes_centers = self._stereo_allenes_centers cis_trans_terminals = self._stereo_cis_trans_terminals + cis_trans_centers = self._stereo_cis_trans_centers morgan = self._chiral_morgan # find new chiral atoms and bonds. @@ -623,20 +636,22 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): if len(path) % 2: chiral_a.add(path[len(path) // 2]) else: - chiral_c.add((n, m)) + chiral_c.add(n) stereogenic.add(n) stereogenic.add(m) # ring cumulenes always chiral. can be already added. for nm in self._rings_cumulenes: n, m = nm if any(len(x) < 8 for x in atoms_rings[n]): # skip small rings. - if nm in chiral_c: # remove already added small rings cumulenes. - chiral_c.discard(nm) + if n in chiral_c: # remove already added small rings cumulenes. + chiral_c.discard(n) + if m in chiral_c: + chiral_c.discard(m) elif n in allenes_centers and (c := allenes_centers[n]) in chiral_a: chiral_a.discard(c) continue elif nm in cis_trans: - chiral_c.add(nm) + chiral_c.add(n) else: chiral_a.add(allenes_centers[n]) pseudo[m] = n @@ -697,13 +712,18 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): elif n in allenes_centers: chiral_a.add(allenes_centers[n]) else: - chiral_c.add(cis_trans_terminals[n]) + chiral_c.add(n) # skip already marked. - chiral_t.difference_update(self._atoms_stereo) - chiral_a.difference_update(self._allenes_stereo) - chiral_c.difference_update(self._cis_trans_stereo) - return chiral_t, chiral_c, chiral_a + stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} + chiral_t.difference_update(stereo_atoms) + chiral_a.difference_update(stereo_atoms) + diff = set() + for n in chiral_c: + i, j = cis_trans_centers[n] + if self._bonds[i][j].stereo is None: + diff.add(cis_trans_terminals[n]) + return chiral_t, diff, chiral_a def __differentiation(self: Union['MoleculeStereo', 'MoleculeContainer'], morgan, atoms_stereo, cis_trans_stereo, allenes_stereo): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 09fa158a..c96fb713 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -693,7 +693,55 @@ def _augmented_substructure(self, atoms: Iterable[int], deep: int): nodes.append(n) return nodes - def _calc_implicit(self, n: int): + def fix_labels(self, recalculate_hydrogens=True): + """ + Fix molecule internal represenation + """ + if not self._changed: + return + + self.calc_labels() # refresh all labels + + if recalculate_hydrogens: + for n in self._changed: + self.calc_implicit(n) # fix Hs count + self._changed = None + + def calc_labels(self): + atoms = self._atoms + for n, m_bond in self._bonds.items(): + neighbors = 0 + heteroatoms = 0 + hybridization = 1 + explicit_hydrogens = 0 + for m, bond in m_bond.items(): + order = bond.order + if order == 8: + continue + elif order == 4: + hybridization = 4 + elif hybridization != 4: + if order == 3: + hybridization = 3 + elif order == 2: + if hybridization == 1: + hybridization = 2 + elif hybridization == 2: + hybridization = 3 + + neighbors += 1 + an = atoms[m].atomic_number + if an == 1: + explicit_hydrogens += 1 + elif an != 6: + heteroatoms += 1 + atom = atoms[n] + atom._neighbors = neighbors + atom._heteroatoms = heteroatoms + atom._hybridization = hybridization + atom._explicit_hydrogens = explicit_hydrogens + + def calc_implicit(self, n: int): """ Set firs possible hydrogens count based on rules """ @@ -746,7 +794,7 @@ def _calc_implicit(self, n: int): return atom._implicit_hydrogens = None # rule not found - def _check_implicit(self, n: int, h: int) -> bool: + def check_implicit(self, n: int, h: int) -> bool: atom = self._atoms[n] if atom.atomic_number == 1: # hydrogen nether has implicit H return h == 0 diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index c8db572a..0a589410 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -138,8 +138,8 @@ def read_structure(self, *, current: bool = True): postprocess_parsed_molecule(tmp, remap=self.__remap, ignore=self.__ignore) parse_sgroup(data, tmp) mol = create_molecule(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self.__ignore, ignore_stereo=self.__ignore_stereo, - calc_cis_trans=self.__calc_cis_trans) + if not self.__ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) mol.meta.update(meta) return mol elif 'reaction' in data and isinstance(data['reaction'], dict): @@ -171,9 +171,9 @@ def read_structure(self, *, current: bool = True): postprocess_parsed_reaction(tmp, remap=self.__remap, ignore=self.__ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _m_cls=self.molecule_cls, _r_cls=self.reaction_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=self.__ignore, ignore_stereo=self.__ignore_stereo, - calc_cis_trans=self.__calc_cis_trans) + if not self.__ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) rxn.meta.update(meta) return rxn else: diff --git a/chython/files/RDFrw.py b/chython/files/RDFrw.py index 0d4475bc..62bebbae 100644 --- a/chython/files/RDFrw.py +++ b/chython/files/RDFrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # Copyright 2019 Dinar Batyrshin # This file is part of chython. # @@ -74,9 +74,9 @@ def read_structure(self, *, current=True) -> Union[ReactionContainer, MoleculeCo postprocess_parsed_reaction(tmp, remap=self._remap, ignore=self._ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _m_cls=self.molecule_cls, _r_cls=self.reaction_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) if meta: rxn.meta.update(meta) return rxn @@ -87,8 +87,8 @@ def read_structure(self, *, current=True) -> Union[ReactionContainer, MoleculeCo postprocess_parsed_molecule(tmp) mol = create_molecule(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) if meta: mol.meta.update(meta) return mol @@ -289,9 +289,9 @@ def mdl_rxn(data: str, /, *, ignore=True, calc_cis_trans=False, ignore_stereo=Fa postprocess_parsed_reaction(tmp, remap=remap, ignore=ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=ignore_bad_isotopes, _m_cls=_m_cls, _r_cls=_r_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - calc_cis_trans=calc_cis_trans) + if not ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=calc_cis_trans) return rxn diff --git a/chython/files/SDFrw.py b/chython/files/SDFrw.py index 6ef8e638..04edb0ad 100644 --- a/chython/files/SDFrw.py +++ b/chython/files/SDFrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -71,8 +71,8 @@ def read_structure(self, *, current=True) -> MoleculeContainer: postprocess_parsed_molecule(tmp, remap=self._remap, ignore=self._ignore) mol = create_molecule(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) meta = self.read_metadata() if meta: mol.meta.update(meta) @@ -213,8 +213,8 @@ def mdl_mol(data: str, /, *, ignore=True, calc_cis_trans=False, ignore_stereo=Fa postprocess_parsed_molecule(tmp, remap=remap, ignore=ignore) mol = create_molecule(tmp, ignore_bad_isotopes=ignore_bad_isotopes, _cls=_cls) - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - calc_cis_trans=calc_cis_trans) + if not ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=calc_cis_trans) return mol diff --git a/chython/files/_convert.py b/chython/files/_convert.py index a450146e..6da1ffd6 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -50,6 +50,9 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if n in bonds[m]: raise ValueError('atoms already bonded') bonds[n][m] = bonds[m][n] = Bond(b) + + g.calc_labels() # set all labels except rings + if any(a.get('z') for a in data['atoms']): # store conformer g._conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] @@ -70,13 +73,13 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if a.implicit_hydrogens is None: # let's try to calculate. in case of errors just keep as is. radicals in smiles should be in [brackets], # thus has implicit Hs value - g._calc_implicit(n) + g.calc_implicit(n) elif keep_implicit: # keep given Hs count as is continue else: # recheck given Hs count h = a.implicit_hydrogens # parsed Hs - g._calc_implicit(n) # recalculate + g.calc_implicit(n) # recalculate if a.implicit_hydrogens is None: # atom has invalid valence or aromatic ring. if a.hybridization == 4: # this is aromatic ring. just restore given H count. @@ -91,7 +94,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False elif not keep_radicals and not a.is_radical: # CXSMILES radical not set. # SMILES doesn't code radicals. so, let's try to guess. a._is_radical = True - if g._check_implicit(n, h): # radical form is valid + if g.check_implicit(n, h): # radical form is valid radicalized.append(n) a._implicit_hydrogens = h elif ignore: # radical state also has errors. @@ -114,11 +117,11 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif g._check_implicit(n, h): # set another possible implicit state. probably Al, P + elif g.check_implicit(n, h): # set another possible implicit state. probably Al, P a._implicit_hydrogens = h elif not keep_radicals and not a.is_radical: # CXSMILES radical is not set. try radical form a._is_radical = True - if g._check_implicit(n, h): + if g.check_implicit(n, h): a._implicit_hydrogens = h radicalized.append(n) # radical state also has errors. diff --git a/chython/files/_mdl/stereo.py b/chython/files/_mdl/stereo.py index 67dd52aa..ce9a651c 100644 --- a/chython/files/_mdl/stereo.py +++ b/chython/files/_mdl/stereo.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,47 +19,16 @@ from ...exceptions import NotChiral, IsChiral, ValenceError -def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, calc_cis_trans=False, - keep_implicit=False): +def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans=False): + if ignore_stereo: + return mapping = data['mapping'] - hydrogens = molecule._hydrogens - hyb = molecule.hybridization - implicit_mismatch = {} if 'chython_parsing_log' in molecule.meta: log = molecule.meta['chython_parsing_log'] else: log = [] - for n, h in data['hydrogens'].items(): - n = mapping[n] - if keep_implicit: # override any calculated hydrogens count. - hydrogens[n] = h - if (hc := hydrogens[n]) is None: # aromatic rings or valence errors - if hyb(n) == 4: # this is aromatic rings. just store given H count. - hydrogens[n] = h - elif hc != h: - if hyb(n) == 4: - if ignore: - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif molecule._check_implicit(n, h): # set another possible implicit state. probably Al, P - hydrogens[n] = h - elif ignore: # just ignore it - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - - if implicit_mismatch: - molecule.meta['chython_implicit_mismatch'] = implicit_mismatch - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log - if ignore_stereo: - return - if calc_cis_trans: molecule.calculate_cis_trans_from_2d() diff --git a/chython/files/daylight/smarts.py b/chython/files/daylight/smarts.py index 4f095e03..3a409505 100644 --- a/chython/files/daylight/smarts.py +++ b/chython/files/daylight/smarts.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from functools import partial from itertools import count from re import compile, findall, search from .parser import parser @@ -113,7 +114,7 @@ def smarts(data: str): elif isinstance(e, str): e = QueryElement.from_symbol(e) else: - e = ListElement(e) + e = partial(ListElement, e) g.add_atom(e(**a), n) for n, m, b in data['bonds']: diff --git a/chython/files/xyz.py b/chython/files/xyz.py index 42ec82e7..612415bc 100644 --- a/chython/files/xyz.py +++ b/chython/files/xyz.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -35,22 +35,17 @@ def xyz(matrix: Sequence[Tuple[str, float, float, float]], charge=0, radical=0, atoms = mol._atoms bonds = mol._bonds - plane = mol._plane - hydrogens = mol._hydrogens - radicals = mol._radicals for n, (a, x, y, z) in enumerate(matrix, 1): atoms[n] = atom = Element.from_symbol(a)() - atom._attach_graph(mol, n) bonds[n] = {} - plane[n] = (x, y) + atom.x = x + atom.y = y + atom._implicit_hydrogens = 0 conformer[n] = (x, y, z) - hydrogens[n] = 0 # implicit hydrogens not supported. - radicals[n] = False # set default value - if atom_charge is None or None in atom_charge: - mol._charges = {n: 0 for n in atoms} # reset charges - else: - mol._charges = dict(enumerate(atom_charge, 1)) + if atom_charge is not None and None not in atom_charge: + for n, c in enumerate(atom_charge, 1): + atoms[n]._charge = c charge = sum(atom_charge) pb = possible_bonds(array(list(conformer.values())), array([a.atomic_radius for a in atoms.values()]), diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 56e9f3d3..6b89b226 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -47,13 +47,6 @@ def __init__(self, isotope: Optional[int] = None, *, self._stereo = stereo self._parsed_mapping = parsed_mapping - self._explicit_hydrogens = 0 - self._neighbors = 0 - self._heteroatoms = 0 - self._hybridization = 1 - self._ring_sizes = () - self._in_ring = False - def __repr__(self): if self.isotope: return f'{self.__class__.__name__}({self.isotope})' @@ -273,12 +266,6 @@ def copy(self, full=False, hydrogens=False, stereo=False) -> 'Element': copy._ring_sizes = self.ring_sizes copy._in_ring = self.in_ring else: - copy._explicit_hydrogens = 0 - copy._neighbors = 0 - copy._heteroatoms = 0 - copy._hybridization = 1 - copy._ring_sizes = () - copy._in_ring = False if hydrogens: copy._implicit_hydrogens = self.implicit_hydrogens else: diff --git a/chython/reactor/base.py b/chython/reactor/base.py index 30212b08..073713e4 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -100,7 +100,7 @@ def _patcher(self, structure: MoleculeContainer, mapping): # replace atom copy._atoms[n] = a = atom.copy() # noqa a._attach_graph(copy, n) # noqa - copy._calc_implicit(n) # noqa + copy.calc_implicit(n) # noqa if self.__fix_rings: copy.kekule() if not copy.thiele(fix_tautomers=self.__fix_tautomers): @@ -194,7 +194,7 @@ def __prepare_skeleton(self, structure, mapping): new._hydrogens.update(keep_hydrogens) # noqa for n in new: if n not in keep_hydrogens: - new._calc_implicit(n) # noqa + new.calc_implicit(n) # noqa return new def __set_stereo(self, new, structure, mapping): From 0e33370e4d85337c5685ca74f313258b650d4fe0 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 8 Nov 2024 16:27:06 +0100 Subject: [PATCH 12/51] Add delta_isotope support for elements Implemented delta_isotope to manage isotopic modifications and added corresponding mdl_isotope properties for multiple elements. Removed common_isotopes and updated relevant assertions for consistent isotope handling. Also updated copyright years for multiple files. --- chython/algorithms/aromatics/kekule.py | 2 +- chython/algorithms/calculate2d/__init__.py | 2 +- chython/algorithms/isomorphism.py | 16 +-- chython/files/_mdl/__init__.py | 4 +- chython/files/_mdl/mol.py | 26 +---- chython/files/libinchi/wrapper.py | 20 ++-- chython/periodictable/__init__.py | 28 +++-- chython/periodictable/base/element.py | 13 ++- chython/periodictable/groupI.py | 28 +++++ chython/periodictable/groupII.py | 24 ++++ chython/periodictable/groupIII.py | 128 +++++++++++++++++++++ chython/periodictable/groupIV.py | 16 +++ chython/periodictable/groupIX.py | 16 +++ chython/periodictable/groupV.py | 16 +++ chython/periodictable/groupVI.py | 16 +++ chython/periodictable/groupVII.py | 16 +++ chython/periodictable/groupVIII.py | 16 +++ chython/periodictable/groupX.py | 16 +++ chython/periodictable/groupXI.py | 16 +++ chython/periodictable/groupXII.py | 16 +++ chython/periodictable/groupXIII.py | 24 ++++ chython/periodictable/groupXIV.py | 24 ++++ chython/periodictable/groupXV.py | 24 ++++ chython/periodictable/groupXVI.py | 24 ++++ chython/periodictable/groupXVII.py | 24 ++++ chython/periodictable/groupXVIII.py | 28 +++++ 26 files changed, 526 insertions(+), 57 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index f1df888c..5a7cc494 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/algorithms/calculate2d/__init__.py b/chython/algorithms/calculate2d/__init__.py index a787abc5..c8fe17a5 100644 --- a/chython/algorithms/calculate2d/__init__.py +++ b/chython/algorithms/calculate2d/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019, 2020 Dinar Batyrshin # This file is part of chython. # diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index a40188a6..ce9193bc 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -185,15 +185,6 @@ def _cython_compiled_structure(self): # long IV: # ring_sizes: not-in-ring bit, 3-atom ring, 4-...., 65-atom ring - from ..files._mdl.mol import common_isotopes - - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - neighbors = self.neighbors - heteroatoms = self.heteroatoms - rings_sizes = self.atoms_rings_sizes - hybridization = self.hybridization mapping = {} numbers = [] @@ -204,7 +195,7 @@ def _cython_compiled_structure(self): for i, (n, a) in enumerate(self._atoms.items()): mapping[n] = i numbers.append(n) - v2 = 1 << (hybridization(n) - 1) + v2 = 1 << (a.hybridization - 1) if (an := a.atomic_number) > 56: if an > 116: # Ts, Og an = 116 @@ -214,7 +205,7 @@ def _cython_compiled_structure(self): v1 = 1 << (57 - an) if a.isotope: - v3 = 1 << (a.isotope - common_isotopes[a.atomic_symbol] + 54) + v3 = 1 << (a.isotope - a.mdl_isotope + 54) if radicals[n]: v3 |= 0x200000000000 else: @@ -337,7 +328,6 @@ def _cython_compiled_query(self): # padding: 1 bit # bond: single, double, triple, aromatic, special = 5 bit # bond in ring: 2 bit - from ..files._mdl.mol import common_isotopes _components, _closures = self._compiled_query components = [] @@ -378,7 +368,7 @@ def _cython_compiled_query(self): v1 = 1 << (57 - n) v2 = 0 if a.isotope: - v3 = 1 << (a.isotope - common_isotopes[a.atomic_symbol] + 54) + v3 = 1 << (a.isotope - a.mdl_isotope + 54) if a.is_radical: v3 |= 0x200000000000 else: diff --git a/chython/files/_mdl/__init__.py b/chython/files/_mdl/__init__.py index d941f381..2310481a 100644 --- a/chython/files/_mdl/__init__.py +++ b/chython/files/_mdl/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .mol import parse_mol_v2000, common_isotopes +from .mol import parse_mol_v2000 from .emol import parse_mol_v3000 from .rxn import parse_rxn_v2000 from .erxn import parse_rxn_v3000 diff --git a/chython/files/_mdl/mol.py b/chython/files/_mdl/mol.py index 3879b7ea..3e15cbf9 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/_mdl/mol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,19 +19,6 @@ from ...exceptions import EmptyMolecule, InvalidCharge, InvalidV2000 -common_isotopes = {'H': 1, 'He': 4, 'Li': 7, 'Be': 9, 'B': 11, 'C': 12, 'N': 14, 'O': 16, 'F': 19, 'Ne': 20, 'Na': 23, - 'Mg': 24, 'Al': 27, 'Si': 28, 'P': 31, 'S': 32, 'Cl': 35, 'Ar': 40, 'K': 39, 'Ca': 40, 'Sc': 45, - 'Ti': 48, 'V': 51, 'Cr': 52, 'Mn': 55, 'Fe': 56, 'Co': 59, 'Ni': 59, 'Cu': 64, 'Zn': 65, 'Ga': 70, - 'Ge': 73, 'As': 75, 'Se': 79, 'Br': 80, 'Kr': 84, 'Rb': 85, 'Sr': 88, 'Y': 89, 'Zr': 91, 'Nb': 93, - 'Mo': 96, 'Tc': 98, 'Ru': 101, 'Rh': 103, 'Pd': 106, 'Ag': 108, 'Cd': 112, 'In': 115, 'Sn': 119, - 'Sb': 122, 'Te': 128, 'I': 127, 'Xe': 131, 'Cs': 133, 'Ba': 137, 'La': 139, 'Ce': 140, 'Pr': 141, - 'Nd': 144, 'Pm': 145, 'Sm': 150, 'Eu': 152, 'Gd': 157, 'Tb': 159, 'Dy': 163, 'Ho': 165, 'Er': 167, - 'Tm': 169, 'Yb': 173, 'Lu': 175, 'Hf': 178, 'Ta': 181, 'W': 184, 'Re': 186, 'Os': 190, 'Ir': 192, - 'Pt': 195, 'Au': 197, 'Hg': 201, 'Tl': 204, 'Pb': 207, 'Bi': 209, 'Po': 209, 'At': 210, 'Rn': 222, - 'Fr': 223, 'Ra': 226, 'Ac': 227, 'Th': 232, 'Pa': 231, 'U': 238, 'Np': 237, 'Pu': 244, 'Am': 243, - 'Cm': 247, 'Bk': 247, 'Cf': 251, 'Es': 252, 'Fm': 257, 'Md': 258, 'No': 259, 'Lr': 260, 'Rf': 261, - 'Db': 270, 'Sg': 269, 'Bh': 270, 'Hs': 270, 'Mt': 278, 'Ds': 281, 'Rg': 281, 'Cn': 285, 'Nh': 278, - 'Fl': 289, 'Mc': 289, 'Lv': 293, 'Ts': 297, 'Og': 294} _ctf_data = {'R': 'is_radical', 'C': 'charge', 'I': 'isotope'} _charge_map = {' 0': 0, ' 1': 3, ' 2': 2, ' 3': 1, ' 4': 0, ' 5': -1, ' 6': -2, ' 7': -3} @@ -59,6 +46,7 @@ def parse_mol_v2000(data): raise InvalidCharge element = line[31:34].strip() isotope = line[34:36] + delta_isotope = None if element in 'AL': raise ValueError('queries not supported') @@ -68,17 +56,15 @@ def parse_mol_v2000(data): raise ValueError('isotope on deuterium atom') isotope = 2 elif isotope != ' 0': - try: - isotope = common_isotopes[element] + int(isotope) - except KeyError: - raise ValueError('invalid element symbol') + delta_isotope = int(isotope) + isotope = None else: isotope = None mapping = line[60:63] atoms.append({'element': element, 'charge': charge, 'isotope': isotope, 'is_radical': False, 'mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), - 'z': float(line[20:30])}) + 'z': float(line[20:30]), 'delta_isotope': delta_isotope}) for line in data[4 + atoms_count: 4 + atoms_count + bonds_count]: a1, a2 = int(line[0:3]) - 1, int(line[3:6]) - 1 @@ -157,4 +143,4 @@ def parse_mol_v2000(data): 'meta': None, 'log': log} -__all__ = ['parse_mol_v2000', 'common_isotopes'] +__all__ = ['parse_mol_v2000'] diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 55749f34..a3504a0b 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -21,7 +21,6 @@ from sysconfig import get_platform from warnings import warn from .._convert import create_molecule -from .._mdl import common_isotopes from ...containers import MoleculeContainer from ...containers.bonds import Bond from ...exceptions import ValenceError, IsChiral, NotChiral @@ -54,8 +53,8 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'mapping': 0, 'x': atom.x, 'y': atom.y, 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, - 'hydrogens': atom.implicit_hydrogens, 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, - 't': atom.implicit_tritium}) + 'hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope, + 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, 't': atom.implicit_tritium}) for k in range(atom.num_bonds): m = atom.neighbor[k] @@ -200,12 +199,13 @@ def atomic_symbol(self): @property def isotope(self): - isotope = self.isotopic_mass - if not isotope: - isotope = None - elif isotope > 9000: # OVER NINE THOUSANDS! - isotope += common_isotopes[self.atomic_symbol] - 10000 - return isotope + if 0 < self.isotopic_mass < 9000: # OVER NINE THOUSANDS! + return self.isotopic_mass + + @property + def delta_isotope(self): + if self.isotope > 9000: + return self.isotope - 10_000 @property def is_radical(self): diff --git a/chython/periodictable/__init__.py b/chython/periodictable/__init__.py index 5f272d31..d494564e 100644 --- a/chython/periodictable/__init__.py +++ b/chython/periodictable/__init__.py @@ -39,6 +39,7 @@ from .groupXVII import * from .groupXVIII import * + modules = {v.__name__: v for k, v in globals().items() if k.startswith('group') and k != 'groups'} elements = {k: v for k, v in globals().items() if isinstance(v, ABCMeta) and k != 'Element' and issubclass(v, Element)} @@ -48,12 +49,21 @@ __all__.extend(elements) -for _class in (DynamicElement, QueryElement): - for k, v in elements.items(): - name = f'{_class.__name__[:-7]}{k}' - globals()[name] = cls = type(name, - (_class, *v.__mro__[-3:-1]), - {'__module__': v.__module__, '__slots__': (), 'atomic_number': v.atomic_number}) - setattr(modules[v.__module__], name, cls) - modules[v.__module__].__all__.append(name) - __all__.append(name) +for k, v in elements.items(): + name = f'Dynamic{k}' + globals()[name] = cls = type(name, (DynamicElement,), + {'__module__': v.__module__, '__slots__': (), + 'atomic_number': v.atomic_number}) + setattr(modules[v.__module__], name, cls) + modules[v.__module__].__all__.append(name) + __all__.append(name) + +for k, v in elements.items(): + name = f'Query{k}' + globals()[name] = cls = type(name, (QueryElement,), + {'__module__': v.__module__, '__slots__': (), + 'atomic_number': v.atomic_number, + 'mdl_isotope': v.mdl_isotope}) + setattr(modules[v.__module__], name, cls) + modules[v.__module__].__all__.append(name) + __all__.append(name) diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 6b89b226..9014e064 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -32,12 +32,16 @@ class Element(ABC): def __init__(self, isotope: Optional[int] = None, *, charge: int = 0, is_radical: bool = False, x: float = 0., y: float = 0., implicit_hydrogens: Optional[int] = None, stereo: Optional[bool] = None, - parsed_mapping: Optional[int] = None): + parsed_mapping: Optional[int] = None, delta_isotope: Optional[int] = None): """ Element object with specified isotope :param isotope: Isotope number of element """ + if delta_isotope is not None: + assert isotope is None, 'isotope absolute value and delta value provided' + isotope = self.mdl_isotope + delta_isotope + self.isotope = isotope self.charge = charge self.is_radical = is_radical @@ -107,6 +111,13 @@ def atomic_radius(self) -> float: Valence radius of atom """ + @property + @abstractmethod + def mdl_isotope(self) -> int: + """ + MDL MOL common isotope + """ + @property def charge(self) -> int: """ diff --git a/chython/periodictable/groupI.py b/chython/periodictable/groupI.py index a7c10f55..a0505f20 100644 --- a/chython/periodictable/groupI.py +++ b/chython/periodictable/groupI.py @@ -48,6 +48,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 0.53 + @property + def mdl_isotope(self): + return 1 + class Li(Element, PeriodII, GroupI): __slots__ = () @@ -76,6 +80,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 167 + @property + def mdl_isotope(self): + return 7 + class Na(Element, PeriodIII, GroupI): __slots__ = () @@ -104,6 +112,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.9 + @property + def mdl_isotope(self): + return 23 + class K(Element, PeriodIV, GroupI): __slots__ = () @@ -132,6 +144,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.43 + @property + def mdl_isotope(self): + return 39 + class Rb(Element, PeriodV, GroupI): __slots__ = () @@ -160,6 +176,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.65 + @property + def mdl_isotope(self): + return 85 + class Cs(Element, PeriodVI, GroupI): __slots__ = () @@ -188,6 +208,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.98 + @property + def mdl_isotope(self): + return 133 + class Fr(Element, PeriodVII, GroupI): __slots__ = () @@ -216,5 +240,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.98 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 223 + __all__ = ['H', 'Li', 'Na', 'K', 'Rb', 'Cs', 'Fr'] diff --git a/chython/periodictable/groupII.py b/chython/periodictable/groupII.py index bae2cf65..8b6337d0 100644 --- a/chython/periodictable/groupII.py +++ b/chython/periodictable/groupII.py @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.12 + @property + def mdl_isotope(self): + return 9 + class Mg(Element, PeriodIII, GroupII): __slots__ = () @@ -81,6 +85,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 24 + class Ca(Element, PeriodIV, GroupII): __slots__ = () @@ -110,6 +118,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.94 + @property + def mdl_isotope(self): + return 40 + class Sr(Element, PeriodV, GroupII): __slots__ = () @@ -138,6 +150,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.19 + @property + def mdl_isotope(self): + return 88 + class Ba(Element, PeriodVI, GroupII): __slots__ = () @@ -167,6 +183,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.53 + @property + def mdl_isotope(self): + return 137 + class Ra(Element, PeriodVII, GroupII): __slots__ = () @@ -195,5 +215,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.53 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 226 + __all__ = ['Be', 'Mg', 'Ca', 'Sr', 'Ba', 'Ra'] diff --git a/chython/periodictable/groupIII.py b/chython/periodictable/groupIII.py index a2683f8d..ca11c5f1 100644 --- a/chython/periodictable/groupIII.py +++ b/chython/periodictable/groupIII.py @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.84 + @property + def mdl_isotope(self): + return 45 + class Y(Element, PeriodV, GroupIII): __slots__ = () @@ -77,6 +81,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 + @property + def mdl_isotope(self): + return 89 + class La(Element, PeriodVI, GroupIII): __slots__ = () @@ -105,6 +113,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 139 + class Ce(Element, PeriodVI, GroupIII): __slots__ = () @@ -137,6 +149,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 140 + class Pr(Element, PeriodVI, GroupIII): __slots__ = () @@ -167,6 +183,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.47 + @property + def mdl_isotope(self): + return 141 + class Nd(Element, PeriodVI, GroupIII): __slots__ = () @@ -208,6 +228,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.06 + @property + def mdl_isotope(self): + return 144 + class Pm(Element, PeriodVI, GroupIII): __slots__ = () @@ -236,6 +260,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.05 + @property + def mdl_isotope(self): + return 145 + class Sm(Element, PeriodVI, GroupIII): __slots__ = () @@ -277,6 +305,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.38 + @property + def mdl_isotope(self): + return 150 + class Eu(Element, PeriodVI, GroupIII): __slots__ = () @@ -316,6 +348,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.31 + @property + def mdl_isotope(self): + return 152 + class Gd(Element, PeriodVI, GroupIII): __slots__ = () @@ -345,6 +381,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.33 + @property + def mdl_isotope(self): + return 157 + class Tb(Element, PeriodVI, GroupIII): __slots__ = () @@ -375,6 +415,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.25 + @property + def mdl_isotope(self): + return 159 + class Dy(Element, PeriodVI, GroupIII): __slots__ = () @@ -406,6 +450,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.28 + @property + def mdl_isotope(self): + return 163 + class Ho(Element, PeriodVI, GroupIII): __slots__ = () @@ -445,6 +493,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.26 + @property + def mdl_isotope(self): + return 165 + class Er(Element, PeriodVI, GroupIII): __slots__ = () @@ -473,6 +525,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.26 + @property + def mdl_isotope(self): + return 167 + class Tm(Element, PeriodVI, GroupIII): __slots__ = () @@ -512,6 +568,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.22 + @property + def mdl_isotope(self): + return 169 + class Yb(Element, PeriodVI, GroupIII): __slots__ = () @@ -552,6 +612,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.22 + @property + def mdl_isotope(self): + return 173 + class Lu(Element, PeriodVI, GroupIII): __slots__ = () @@ -580,6 +644,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 + @property + def mdl_isotope(self): + return 175 + class Ac(Element, PeriodVII, GroupIII): __slots__ = () @@ -608,6 +676,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 227 + class Th(Element, PeriodVII, GroupIII): __slots__ = () @@ -641,6 +713,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 232 + class Pa(Element, PeriodVII, GroupIII): __slots__ = () @@ -671,6 +747,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 231 + class U(Element, PeriodVII, GroupIII): __slots__ = () @@ -700,6 +780,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 238 + class Np(Element, PeriodVII, GroupIII): __slots__ = () @@ -730,6 +814,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 237 + class Pu(Element, PeriodVII, GroupIII): __slots__ = () @@ -768,6 +856,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 244 + class Am(Element, PeriodVII, GroupIII): __slots__ = () @@ -796,6 +888,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 243 + class Cm(Element, PeriodVII, GroupIII): __slots__ = () @@ -824,6 +920,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 247 + class Bk(Element, PeriodVII, GroupIII): __slots__ = () @@ -852,6 +952,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 247 + class Cf(Element, PeriodVII, GroupIII): __slots__ = () @@ -880,6 +984,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 251 + class Es(Element, PeriodVII, GroupIII): __slots__ = () @@ -908,6 +1016,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 252 + class Fm(Element, PeriodVII, GroupIII): __slots__ = () @@ -936,6 +1048,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 257 + class Md(Element, PeriodVII, GroupIII): __slots__ = () @@ -964,6 +1080,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 258 + class No(Element, PeriodVII, GroupIII): __slots__ = () @@ -992,6 +1112,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 259 + class Lr(Element, PeriodVII, GroupIII): __slots__ = () @@ -1020,6 +1144,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 260 + __all__ = ['Sc', 'Y', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', diff --git a/chython/periodictable/groupIV.py b/chython/periodictable/groupIV.py index c80e1482..70c626b8 100644 --- a/chython/periodictable/groupIV.py +++ b/chython/periodictable/groupIV.py @@ -80,6 +80,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.76 + @property + def mdl_isotope(self): + return 48 + class Zr(Element, PeriodV, GroupIV): __slots__ = () @@ -127,6 +131,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.06 + @property + def mdl_isotope(self): + return 91 + class Hf(Element, PeriodVI, GroupIV): __slots__ = () @@ -162,6 +170,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.08 + @property + def mdl_isotope(self): + return 178 + class Rf(Element, PeriodVII, GroupIV): __slots__ = () @@ -190,5 +202,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.08 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 261 + __all__ = ['Ti', 'Zr', 'Hf', 'Rf'] diff --git a/chython/periodictable/groupIX.py b/chython/periodictable/groupIX.py index 97608fd9..b1fe8055 100644 --- a/chython/periodictable/groupIX.py +++ b/chython/periodictable/groupIX.py @@ -71,6 +71,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.52 + @property + def mdl_isotope(self): + return 59 + class Rh(Element, PeriodV, GroupIX): __slots__ = () @@ -108,6 +112,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.73 + @property + def mdl_isotope(self): + return 103 + class Ir(Element, PeriodVI, GroupIX): __slots__ = () @@ -148,6 +156,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.8 + @property + def mdl_isotope(self): + return 192 + class Mt(Element, PeriodVII, GroupIX): __slots__ = () @@ -176,5 +188,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.8 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 278 + __all__ = ['Co', 'Rh', 'Ir', 'Mt'] diff --git a/chython/periodictable/groupV.py b/chython/periodictable/groupV.py index 66036c63..67e56d7d 100644 --- a/chython/periodictable/groupV.py +++ b/chython/periodictable/groupV.py @@ -68,6 +68,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 + @property + def mdl_isotope(self): + return 51 + class Nb(Element, PeriodV, GroupV): __slots__ = () @@ -111,6 +115,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.98 + @property + def mdl_isotope(self): + return 93 + class Ta(Element, PeriodVI, GroupV): __slots__ = () @@ -144,6 +152,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.0 + @property + def mdl_isotope(self): + return 181 + class Db(Element, PeriodVII, GroupV): __slots__ = () @@ -172,5 +184,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.0 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['V', 'Nb', 'Ta', 'Db'] diff --git a/chython/periodictable/groupVI.py b/chython/periodictable/groupVI.py index 03b76191..0511d734 100644 --- a/chython/periodictable/groupVI.py +++ b/chython/periodictable/groupVI.py @@ -59,6 +59,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.66 + @property + def mdl_isotope(self): + return 52 + class Mo(Element, PeriodV, GroupVI): __slots__ = () @@ -102,6 +106,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.90 + @property + def mdl_isotope(self): + return 96 + class W(Element, PeriodVI, GroupVI): __slots__ = () @@ -135,6 +143,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.93 + @property + def mdl_isotope(self): + return 184 + class Sg(Element, PeriodVII, GroupVI): __slots__ = () @@ -163,5 +175,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.93 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 269 + __all__ = ['Cr', 'Mo', 'W', 'Sg'] diff --git a/chython/periodictable/groupVII.py b/chython/periodictable/groupVII.py index 3fceee40..f754b97e 100644 --- a/chython/periodictable/groupVII.py +++ b/chython/periodictable/groupVII.py @@ -57,6 +57,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.61 + @property + def mdl_isotope(self): + return 55 + class Tc(Element, PeriodV, GroupVII): __slots__ = () @@ -86,6 +90,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.83 + @property + def mdl_isotope(self): + return 98 + class Re(Element, PeriodVI, GroupVII): __slots__ = () @@ -114,6 +122,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.88 + @property + def mdl_isotope(self): + return 186 + class Bh(Element, PeriodVII, GroupVII): __slots__ = () @@ -142,5 +154,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.88 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['Mn', 'Tc', 'Re', 'Bh'] diff --git a/chython/periodictable/groupVIII.py b/chython/periodictable/groupVIII.py index ea510d60..15056c3f 100644 --- a/chython/periodictable/groupVIII.py +++ b/chython/periodictable/groupVIII.py @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 56 + class Ru(Element, PeriodV, GroupVIII): __slots__ = () @@ -81,6 +85,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.78 + @property + def mdl_isotope(self): + return 101 + class Os(Element, PeriodVI, GroupVIII): __slots__ = () @@ -113,6 +121,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.85 + @property + def mdl_isotope(self): + return 190 + class Hs(Element, PeriodVII, GroupVIII): __slots__ = () @@ -141,5 +153,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.85 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['Fe', 'Ru', 'Os', 'Hs'] diff --git a/chython/periodictable/groupX.py b/chython/periodictable/groupX.py index 0ca6aa05..8c8b2c08 100644 --- a/chython/periodictable/groupX.py +++ b/chython/periodictable/groupX.py @@ -52,6 +52,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.49 + @property + def mdl_isotope(self): + return 59 + class Pd(Element, PeriodV, GroupX): __slots__ = () @@ -85,6 +89,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.69 + @property + def mdl_isotope(self): + return 106 + class Pt(Element, PeriodVI, GroupX): __slots__ = () @@ -118,6 +126,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.77 + @property + def mdl_isotope(self): + return 195 + class Ds(Element, PeriodVII, GroupX): __slots__ = () @@ -146,5 +158,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.77 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 281 + __all__ = ['Ni', 'Pd', 'Pt', 'Ds'] diff --git a/chython/periodictable/groupXI.py b/chython/periodictable/groupXI.py index 96be94af..1c80d3d5 100644 --- a/chython/periodictable/groupXI.py +++ b/chython/periodictable/groupXI.py @@ -52,6 +52,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 64 + class Ag(Element, PeriodV, GroupXI): __slots__ = () @@ -84,6 +88,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.65 + @property + def mdl_isotope(self): + return 108 + class Au(Element, PeriodVI, GroupXI): __slots__ = () @@ -116,6 +124,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.74 + @property + def mdl_isotope(self): + return 197 + class Rg(Element, PeriodVII, GroupXI): __slots__ = () @@ -144,5 +156,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.74 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 281 + __all__ = ['Cu', 'Ag', 'Au', 'Rg'] diff --git a/chython/periodictable/groupXII.py b/chython/periodictable/groupXII.py index 17a3e8cf..2b59c90b 100644 --- a/chython/periodictable/groupXII.py +++ b/chython/periodictable/groupXII.py @@ -50,6 +50,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.42 + @property + def mdl_isotope(self): + return 65 + class Cd(Element, PeriodV, GroupXII): __slots__ = () @@ -80,6 +84,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.61 + @property + def mdl_isotope(self): + return 112 + class Hg(Element, PeriodVI, GroupXII): __slots__ = () @@ -110,6 +118,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 + @property + def mdl_isotope(self): + return 201 + class Cn(Element, PeriodVII, GroupXII): __slots__ = () @@ -138,5 +150,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 285 + __all__ = ['Zn', 'Cd', 'Hg', 'Cn'] diff --git a/chython/periodictable/groupXIII.py b/chython/periodictable/groupXIII.py index c0d3f507..ef5243a6 100644 --- a/chython/periodictable/groupXIII.py +++ b/chython/periodictable/groupXIII.py @@ -51,6 +51,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 11 + class Al(Element, PeriodIII, GroupXIII): __slots__ = () @@ -81,6 +85,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.18 + @property + def mdl_isotope(self): + return 27 + class Ga(Element, PeriodIV, GroupXIII): __slots__ = () @@ -115,6 +123,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.36 + @property + def mdl_isotope(self): + return 70 + class In(Element, PeriodV, GroupXIII): __slots__ = () @@ -145,6 +157,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 115 + class Tl(Element, PeriodVI, GroupXIII): __slots__ = () @@ -175,6 +191,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 204 + class Nh(Element, PeriodVII, GroupXIII): __slots__ = () @@ -203,5 +223,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 278 + __all__ = ['B', 'Al', 'Ga', 'In', 'Tl', 'Nh'] diff --git a/chython/periodictable/groupXIV.py b/chython/periodictable/groupXIV.py index 0a18f705..bd94ad60 100644 --- a/chython/periodictable/groupXIV.py +++ b/chython/periodictable/groupXIV.py @@ -50,6 +50,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .67 + @property + def mdl_isotope(self): + return 12 + class Si(Element, PeriodIII, GroupXIV): __slots__ = () @@ -78,6 +82,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.11 + @property + def mdl_isotope(self): + return 28 + class Ge(Element, PeriodIV, GroupXIV): __slots__ = () @@ -106,6 +114,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.25 + @property + def mdl_isotope(self): + return 73 + class Sn(Element, PeriodV, GroupXIV): __slots__ = () @@ -144,6 +156,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 119 + class Pb(Element, PeriodVI, GroupXIV): __slots__ = () @@ -182,6 +198,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.54 + @property + def mdl_isotope(self): + return 207 + class Fl(Element, PeriodVII, GroupXIV): __slots__ = () @@ -210,5 +230,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.54 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 289 + __all__ = ['C', 'Si', 'Ge', 'Sn', 'Pb', 'Fl'] diff --git a/chython/periodictable/groupXV.py b/chython/periodictable/groupXV.py index 218aeecc..700efe89 100644 --- a/chython/periodictable/groupXV.py +++ b/chython/periodictable/groupXV.py @@ -51,6 +51,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .56 + @property + def mdl_isotope(self): + return 14 + class P(Element, PeriodIII, GroupXV): __slots__ = () @@ -86,6 +90,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .98 + @property + def mdl_isotope(self): + return 31 + class As(Element, PeriodIV, GroupXV): __slots__ = () @@ -114,6 +122,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.14 + @property + def mdl_isotope(self): + return 75 + class Sb(Element, PeriodV, GroupXV): __slots__ = () @@ -143,6 +155,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.33 + @property + def mdl_isotope(self): + return 122 + class Bi(Element, PeriodVI, GroupXV): __slots__ = () @@ -188,6 +204,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.43 + @property + def mdl_isotope(self): + return 209 + class Mc(Element, PeriodVII, GroupXV): __slots__ = () @@ -216,5 +236,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.43 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 289 + __all__ = ['N', 'P', 'As', 'Sb', 'Bi', 'Mc'] diff --git a/chython/periodictable/groupXVI.py b/chython/periodictable/groupXVI.py index 4791eb2a..85f72a23 100644 --- a/chython/periodictable/groupXVI.py +++ b/chython/periodictable/groupXVI.py @@ -51,6 +51,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .48 + @property + def mdl_isotope(self): + return 16 + class S(Element, PeriodIII, GroupXVI): __slots__ = () @@ -227,6 +231,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 32 + class Se(Element, PeriodIV, GroupXVI): __slots__ = () @@ -286,6 +294,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.03 + @property + def mdl_isotope(self): + return 79 + class Te(Element, PeriodV, GroupXVI): __slots__ = () @@ -336,6 +348,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.23 + @property + def mdl_isotope(self): + return 128 + class Po(Element, PeriodVI, GroupXVI): __slots__ = () @@ -369,6 +385,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.35 + @property + def mdl_isotope(self): + return 209 + class Lv(Element, PeriodVII, GroupXVI): __slots__ = () @@ -397,5 +417,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.35 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 293 + __all__ = ['O', 'S', 'Se', 'Te', 'Po', 'Lv'] diff --git a/chython/periodictable/groupXVII.py b/chython/periodictable/groupXVII.py index da6ce4c0..3eecfc17 100644 --- a/chython/periodictable/groupXVII.py +++ b/chython/periodictable/groupXVII.py @@ -50,6 +50,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .42 + @property + def mdl_isotope(self): + return 19 + class Cl(Element, PeriodIII, GroupXVII): __slots__ = () @@ -89,6 +93,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .79 + @property + def mdl_isotope(self): + return 35 + class Br(Element, PeriodIV, GroupXVII): __slots__ = () @@ -135,6 +143,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 0.94 + @property + def mdl_isotope(self): + return 80 + class I(Element, PeriodV, GroupXVII): __slots__ = () @@ -203,6 +215,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.15 + @property + def mdl_isotope(self): + return 127 + class At(Element, PeriodVI, GroupXVII): __slots__ = () @@ -232,6 +248,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.27 + @property + def mdl_isotope(self): + return 210 + class Ts(Element, PeriodVII, GroupXVII): __slots__ = () @@ -260,5 +280,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.27 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 297 + __all__ = ['F', 'Cl', 'Br', 'I', 'At', 'Ts'] diff --git a/chython/periodictable/groupXVIII.py b/chython/periodictable/groupXVIII.py index 849a893c..b8137593 100644 --- a/chython/periodictable/groupXVIII.py +++ b/chython/periodictable/groupXVIII.py @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .31 + @property + def mdl_isotope(self): + return 4 + class Ne(Element, PeriodII, GroupXVIII): __slots__ = () @@ -77,6 +81,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .38 + @property + def mdl_isotope(self): + return 20 + class Ar(Element, PeriodIII, GroupXVIII): __slots__ = () @@ -105,6 +113,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .71 + @property + def mdl_isotope(self): + return 40 + class Kr(Element, PeriodIV, GroupXVIII): __slots__ = () @@ -133,6 +145,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 84 + class Xe(Element, PeriodV, GroupXVIII): __slots__ = () @@ -172,6 +188,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.08 + @property + def mdl_isotope(self): + return 131 + class Rn(Element, PeriodVI, GroupXVIII): __slots__ = () @@ -200,6 +220,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.2 + @property + def mdl_isotope(self): + return 222 + class Og(Element, PeriodVII, GroupXVIII): __slots__ = () @@ -228,5 +252,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.2 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 294 + __all__ = ['He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', 'Og'] From b0921dbb1ee7ebce21a9609add59c211c57321e2 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sat, 9 Nov 2024 15:00:50 +0100 Subject: [PATCH 13/51] cache invalidation fixed for kekule and thiele --- chython/algorithms/aromatics/kekule.py | 3 +++ chython/algorithms/aromatics/thiele.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 5a7cc494..2452d320 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -52,6 +52,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool for n in atoms: self.calc_implicit(n) self.flush_cache() + self.calc_labels() return True return fixed @@ -70,6 +71,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): atoms.add(m) for n in atoms: copy.calc_implicit(n) + copy.calc_labels() yield copy def __fix_rings(self: 'MoleculeContainer'): @@ -92,6 +94,7 @@ def __fix_rings(self: 'MoleculeContainer'): bonds[n][m]._order = b if seen: self.flush_cache() + self.calc_labels() return True return False diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 0b2ce586..9c791ddc 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -164,6 +164,8 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: bonds[n][m]._order = o if not acceptors: break + self.flush_cache() + self.calc_labels() if double_bonded: # delete quinones for n in double_bonded: @@ -214,6 +216,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: bonds[n][m]._order = 4 self.flush_cache() + self.calc_labels() for ring in freaks: # aromatize rule based for q in freak_rules: if next(q.get_mapping(self, searching_scope=ring, automorphism_filter=False), None): @@ -224,6 +227,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: break if freaks: self.flush_cache() # flush again + self.calc_labels() self.fix_stereo() # check if any stereo centers vanished. return True From 3d082ef906c706a6425b96aa145a078786d602a3 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 10:49:37 +0100 Subject: [PATCH 14/51] api changes. all isomorphism labels now maintained --- chython/algorithms/rings.py | 46 ++++++++------------------- chython/containers/graph.py | 25 +++++++++++++-- chython/containers/molecule.py | 26 ++++++++------- chython/periodictable/base/element.py | 4 +-- 4 files changed, 52 insertions(+), 49 deletions(-) diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 0b50b2a4..d2cecf1d 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from CachedMethods import cached_args_method from collections import defaultdict, deque from functools import cached_property from itertools import combinations @@ -33,7 +32,7 @@ class Rings: __slots__ = () @cached_property - def sssr(self) -> Tuple[Tuple[int, ...], ...]: + def sssr(self) -> List[Tuple[int, ...]]: """ Smallest Set of Smallest Rings. Special bonds ignored. @@ -47,10 +46,10 @@ def sssr(self) -> Tuple[Tuple[int, ...], ...]: """ if self.rings_count: return _sssr(self.not_special_connectivity, self.rings_count) - return () + return [] @cached_property - def atoms_rings(self) -> Dict[int, Tuple[Tuple[int, ...]]]: + def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: """ Dict of atoms rings which contains it. """ @@ -58,28 +57,17 @@ def atoms_rings(self) -> Dict[int, Tuple[Tuple[int, ...]]]: for r in self.sssr: for n in r: rings[n].append(r) - return {n: tuple(rs) for n, rs in rings.items()} + return dict(rings) @cached_property - def atoms_rings_sizes(self) -> Dict[int, Tuple[int, ...]]: + def atoms_rings_sizes(self) -> Dict[int, Set[int]]: """ Sizes of rings containing atom. """ - return {n: tuple(len(r) for r in rs) for n, rs in self.atoms_rings.items()} - - @cached_args_method - def is_ring_bond(self: 'Graph', n: int, m: int, /) -> bool: - """ - Check is bond in any ring. - """ - self.bond(n, m) # check if bond exists - try: - return not set(self.atoms_rings[n]).isdisjoint(self.atoms_rings[m]) - except KeyError: - return False + return {n: {len(r) for r in rs} for n, rs in self.atoms_rings.items()} @cached_property - def ring_atoms(self): + def ring_atoms(self) -> Set[int]: """ Atoms in rings. Not SSSR based fast algorithm. """ @@ -136,13 +124,11 @@ def not_special_connectivity(self: 'Graph') -> Dict[int, Set[int]]: return bonds @cached_property - def connected_components(self: 'Graph') -> Tuple[Tuple[int, ...], ...]: + def connected_components(self: 'Graph') -> List[Set[int]]: """ Isolated components of single graph. E.g. salts as ion pair. """ - if not self._atoms: - return () - return tuple(tuple(x) for x in self._connected_components) + return _connected_components(self._bonds) @property def connected_components_count(self) -> int: @@ -158,12 +144,8 @@ def skin_graph(self: 'Graph') -> Dict[int, Set[int]]: """ return _skin_graph(self._bonds) - @cached_property - def _connected_components(self: 'Graph') -> List[Set[int]]: - return _connected_components(self._bonds) - -def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> Tuple[Tuple[int, ...], ...]: +def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> List[Tuple[int, ...]]: """ Smallest Set of Smallest Rings of any adjacency matrix. Number of rings required. @@ -529,7 +511,7 @@ def _connected_rings(rings, seen_rings): def _rings_filter(rings, n_sssr): c = next(rings) if n_sssr == 1: - return c, + return [c] seen_rings = {c} sssr_atoms = set(c) @@ -545,7 +527,7 @@ def _rings_filter(rings, n_sssr): sssr_atoms.update(c) sssr.append(c) if len(sssr) == n_sssr: - return tuple(sssr) + return sssr # now we have set of plug rings (cuban fullerene), besiege rings and condensed trash seen_rings = {c: _ring_adjacency(c) for c in seen_rings} # prepare adjacency @@ -558,7 +540,7 @@ def _rings_filter(rings, n_sssr): condensed_rings = _connected_rings(condensed_rings, seen_rings) sssr.append(c) if len(sssr) == n_sssr: - return tuple(sorted(sssr, key=len)) + return sorted(sssr, key=len) raise ImplementationError('SSSR count not reached') diff --git a/chython/containers/graph.py b/chython/containers/graph.py index fe3dc720..7fa5dead 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -101,7 +101,7 @@ def add_atom(self, atom: Atom, n: Optional[int] = None) -> int: self._atoms[n] = atom self._bonds[n] = {} - self.flush_cache() + self.flush_cache(keep_sssr=True) return n @abstractmethod @@ -169,8 +169,27 @@ def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): u._bonds.update(other._bonds) return u - def flush_cache(self): - self.__dict__.clear() + def flush_cache(self, *, keep_sssr=False, keep_components=False): + backup = {} + if keep_sssr: + # good to keep if no new bonds or bonds deletions or bonds to/from any change + if 'sssr' in self.__dict__: + backup['sssr'] = self.sssr + if 'atoms_rings' in self.__dict__: + backup['atoms_rings'] = self.atoms_rings + if 'atoms_rings_sizes' in self.__dict__: + backup['atoms_rings_sizes'] = self.atoms_rings_sizes + if 'ring_atoms' in self.__dict__: + backup['ring_atoms'] = self.ring_atoms + if 'not_special_connectivity' in self.__dict__: + backup['not_special_connectivity'] = self.not_special_connectivity + if 'rings_count' in self.__dict__: + backup['rings_count'] = self.rings_count + if keep_components: + # good to keep if no new bonds or bonds deletions + if 'connected_components' in self.__dict__: + backup['connected_components'] = self.connected_components + self.__dict__ = backup def __copy__(self): return self.copy() diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index c96fb713..be079c02 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -169,7 +169,7 @@ def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=Fals else: self._changed.add(n) if not _skip_calculation and self._backup is None: - self.fix_labels() + self.fix_structure() return n def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): @@ -192,7 +192,7 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): self._changed.add(n) self._changed.add(m) if not _skip_calculation and self._backup is None: - self.fix_labels() + self.fix_structure() self.fix_stereo() def delete_atom(self, n: int, *, _skip_calculation=False): @@ -213,7 +213,7 @@ def delete_atom(self, n: int, *, _skip_calculation=False): else: self._changed.add(m) if not _skip_calculation and self._backup is None: - self.fix_labels() + self.fix_structure() self.fix_stereo() def delete_bond(self, n: int, m: int, *, _skip_calculation=False): @@ -232,7 +232,7 @@ def delete_bond(self, n: int, m: int, *, _skip_calculation=False): self._changed.add(n) self._changed.add(m) if not _skip_calculation and self._backup is None: - self.fix_labels() + self.fix_structure() self.fix_stereo() def copy(self) -> 'MoleculeContainer': @@ -321,7 +321,7 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul sbn[m] = sb[m][n] elif m in atoms: sbn[m] = bond.copy(stereo=True) - sub.fix_labels(recalculate_hydrogens=recalculate_hydrogens) + sub.fix_structure(recalculate_hydrogens=recalculate_hydrogens) sub.fix_stereo() return sub @@ -693,22 +693,21 @@ def _augmented_substructure(self, atoms: Iterable[int], deep: int): nodes.append(n) return nodes - def fix_labels(self, recalculate_hydrogens=True): + def fix_structure(self, recalculate_hydrogens=True): """ - Fix molecule internal represenation + Fix molecule internal representation """ - if not self._changed: - return - self.calc_labels() # refresh all labels if recalculate_hydrogens: - for n in self._changed: + for n in (self._changed or self._atoms): self.calc_implicit(n) # fix Hs count self._changed = None def calc_labels(self): atoms = self._atoms + atoms_rings_sizes = self.atoms_rings_sizes # expensive: sssr based + for n, m_bond in self._bonds.items(): neighbors = 0 heteroatoms = 0 @@ -741,6 +740,9 @@ def calc_labels(self): atom._hybridization = hybridization atom._explicit_hydrogens = explicit_hydrogens + atom._in_ring = n in atoms_rings_sizes + atom._ring_sizes = atoms_rings_sizes.get(n) or set() + def calc_implicit(self, n: int): """ Set firs possible hydrogens count based on rules @@ -868,7 +870,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self._name = backup._name self.flush_cache() else: # update internal state - self.fix_labels() + self.fix_structure() self.fix_stereo() self._backup = None # drop backup diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 9014e064..88ca210e 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -244,7 +244,7 @@ def hybridization(self): return self._hybridization @property - def ring_sizes(self) -> Tuple[int, ...]: + def ring_sizes(self) -> Set[int]: """ Atom rings sizes. """ @@ -274,7 +274,7 @@ def copy(self, full=False, hydrogens=False, stereo=False) -> 'Element': copy._neighbors = self.neighbors copy._heteroatoms = self.heteroatoms copy._hybridization = self.hybridization - copy._ring_sizes = self.ring_sizes + copy._ring_sizes = self.ring_sizes.copy() copy._in_ring = self.in_ring else: if hydrogens: From 269da1a42ea0dc98ec6c847ffb5b97658c54ceaa Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 10:53:03 +0100 Subject: [PATCH 15/51] fixed aromaticity handling --- chython/algorithms/aromatics/kekule.py | 8 ++++++-- chython/algorithms/aromatics/thiele.py | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 2452d320..f7d90918 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -51,7 +51,7 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool atoms.add(m) for n in atoms: self.calc_implicit(n) - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) self.calc_labels() return True return fixed @@ -78,6 +78,7 @@ def __fix_rings(self: 'MoleculeContainer'): atoms = self._atoms bonds = self._bonds seen = set() + keep = True for q, af, bf, mm in rules: for mapping in q.get_mapping(self, automorphism_filter=False): match = set(mapping.values()) @@ -92,8 +93,11 @@ def __fix_rings(self: 'MoleculeContainer'): n = mapping[n] m = mapping[m] bonds[n][m]._order = b + if b == 8: + # flush sssr and components cache + keep = False if seen: - self.flush_cache() + self.flush_cache(keep_sssr=keep, keep_components=keep) self.calc_labels() return True return False diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 9c791ddc..f236e887 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -164,7 +164,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: bonds[n][m]._order = o if not acceptors: break - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) self.calc_labels() if double_bonded: # delete quinones @@ -215,7 +215,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: for n, m in zip(ring, ring[1:]): bonds[n][m]._order = 4 - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) self.calc_labels() for ring in freaks: # aromatize rule based for q in freak_rules: @@ -226,7 +226,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: bonds[n][m]._order = 4 break if freaks: - self.flush_cache() # flush again + self.flush_cache(keep_sssr=True, keep_components=True) # flush again self.calc_labels() self.fix_stereo() # check if any stereo centers vanished. return True From f7d8e899bd85e194cdba63ca381756d5d290d6ac Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 12:54:45 +0100 Subject: [PATCH 16/51] refactored standardization --- chython/algorithms/morgan.py | 11 +- chython/algorithms/standardize/molecule.py | 165 ++++++++----------- chython/algorithms/standardize/resonance.py | 61 ++++--- chython/algorithms/standardize/salts.py | 37 ++--- chython/algorithms/standardize/saturation.py | 30 ++-- 5 files changed, 130 insertions(+), 174 deletions(-) diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index 659c50c8..36086ada 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -40,13 +40,12 @@ def atoms_order(self: 'Graph') -> Dict[int, int]: :return: dict of atom-order pairs """ - atoms = self._atoms - if not atoms: # for empty containers + if not self._atoms: # for empty containers return {} - elif len(atoms) == 1: # optimize single atom containers - return dict.fromkeys(atoms, 1) + elif len(self._atoms) == 1: # optimize single atom containers + return dict.fromkeys(self._atoms, 1) ring = self.ring_atoms - return _morgan({n: hash((hash(a), n in ring)) for n, a in atoms.items()}, self.int_adjacency) + return _morgan({n: hash((hash(a), n in ring)) for n, a in self._atoms.items()}, self.int_adjacency) @cached_property def int_adjacency(self: 'Graph') -> Dict[int, Dict[int, int]]: diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index c9fb0893..049671a2 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -50,7 +50,7 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F h, changed = self.implicify_hydrogens(_fix_stereo=False, logging=True) if fix_tautomers and (logging or keep_kekule): # thiele can change tautomeric form - hgs = self._hydrogens.copy() + hgs = {n: a.implicit_hydrogens for n, a in self._atoms.items()} if keep_kekule: # save bond orders bonds = [(b, b.order) for _, _, b in self.bonds()] @@ -65,8 +65,9 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F self.kekule() # we need to do full kekule again else: for b, o in bonds: # noqa - b._Bond__order = o # noqa - self.flush_cache() + b._order = o + self.flush_cache() + self.calc_labels() if logging: if k: @@ -75,13 +76,12 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F s.append((tuple(changed), -1, 'implicified')) if t: s.append(((), -1, 'aromatized')) - if fix_tautomers and hgs != self._hydrogens: - s.append((tuple(x for x, y in self._hydrogens.items() if hgs[x] != y), - -1, 'aromatic tautomer found')) + if fix_tautomers and (x := tuple(n for n, a in self._atoms.items() if hgs[n] != a.implicit_hydrogens)): + s.append((x, -1, 'aromatic tautomer found')) if c: s.append((tuple(c), -1, 'recharged')) if keep_kekule and t: - if c or fix_tautomers and hgs != self._hydrogens: + if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self._atoms.items()): s.append(((), -1, 'kekulized again')) else: s.append(((), -1, 'kekule form restored')) @@ -118,16 +118,14 @@ def standardize(self: Union['MoleculeContainer', 'Standardize'], *, logging=Fals log.extend(l) fixed.update(f) - if b := fixed.intersection(n for n, h in self._hydrogens.items() if h is None): + if b := fixed.intersection(n for n, a in self._atoms.items() if a.implicit_hydrogens is None): if ignore: log.append((tuple(b), -1, 'standardization failed')) else: raise ImplementationError(f'standardization leads to invalid valences: {b}') - if fixed: - self.flush_cache() - if _fix_stereo: - self.fix_stereo() + if fixed and _fix_stereo: + self.fix_stereo() if logging: if fixed: @@ -146,10 +144,7 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol changed: List[int] = [] bonds = self._bonds nsc = self.not_special_connectivity - hydrogens = self._hydrogens - charges = self._charges atoms = self._atoms - hybridization = self.hybridization if prepare_molecule: self.thiele() @@ -165,25 +160,25 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol # if not 2 neighbors and 1 hydrogen or 3 neighbors within 1st and second atoms - break atom_1, atom_2 = mapping[1], mapping[2] if len(bonds[atom_1]) == 2: - if not hydrogens[atom_1]: + if not atoms[atom_1].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_1].values()): continue if len(bonds[atom_2]) == 2: - if not hydrogens[atom_2]: + if not atoms[atom_2].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_2].values()): continue if fix: atom_3 = mapping[3] - charges[atom_3] = 0 + atoms[atom_3]._charge = 0 changed.append(atom_3) else: - charges[atom_1] = 0 + atoms[atom_1]._charge = 0 changed.append(atom_1) - charges[atom_2] = 1 + atoms[atom_2]._charge = 1 changed.append(atom_2) # add atoms to changed # morgan @@ -196,36 +191,36 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol seen.update(match) atom_1, atom_2 = mapping[1], mapping[2] if len(bonds[atom_1]) == 2: - if not hydrogens[atom_1]: + if not atoms[atom_1].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_1].values()): continue if len(bonds[atom_2]) == 2: - if not hydrogens[atom_2]: + if not atoms[atom_2].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_2].values()): continue if fix: atom_3 = mapping[3] - charges[atom_3] = 0 + atoms[atom_3]._charge = 0 changed.append(atom_3) else: # remove charge from 1st N atom - charges[atom_1] = 0 + atoms[atom_1]._charge = 0 pairs.append((atom_1, atom_2, fix)) if pairs: self.__dict__.pop('atoms_order', None) # remove cached morgan for atom_1, atom_2, fix in pairs: if self.atoms_order[atom_1] > self.atoms_order[atom_2]: - charges[atom_2] = 1 + atoms[atom_2]._charge = 1 changed.append(atom_2) if not fix: changed.append(atom_1) else: - charges[atom_1] = 1 + atoms[atom_1]._charge = 1 if fix: changed.append(atom_1) del self.__dict__['atoms_order'] # remove invalid morgan @@ -233,9 +228,9 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol # ferrocene fcr = [] for r in self.sssr: - if len(r) != 5 or not all(hybridization(n) == 4 for n in r): + if len(r) != 5 or not all(atoms[n].hybridization == 4 for n in r): continue - ch = [(n, x) for n in r if (x := charges[n])] + ch = [(n, x) for n in r if (x := atoms[n].charge)] if len(ch) != 1 or ch[0][1] != -1: continue ch = ch[0][0] @@ -243,19 +238,19 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b.order == 1 for b in bonds[n].values()))] if len(ca) < 2 or ch not in ca: continue - charges[ch] = 0 # reset charge for morgan recalculation + atoms[ch]._charge = 0 # reset charge for morgan recalculation fcr.append(ca) changed.append(ch) if fcr: self.__dict__.pop('atoms_order', None) # remove cached morgan for ca in fcr: n = min(ca, key=self.atoms_order.get) - charges[n] = -1 + atoms[n]._charge = -1 changed.append(n) del self.__dict__['atoms_order'] # remove invalid morgan if changed: - self.flush_cache() # clear cache + self.flush_cache(keep_sssr=True, keep_components=True) # clear cache if _fix_stereo: self.fix_stereo() if logging: @@ -284,7 +279,8 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, del bonds[n][m], bonds[m][n] if ab: - self.flush_cache() + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() return len(ab) @@ -299,12 +295,7 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo :param logging: return list of changed atoms. """ atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - plane = self._plane - hydrogens = self._hydrogens - parsed_mapping = self._parsed_mapping explicit = defaultdict(list) for n, atom in atoms.items(): @@ -322,8 +313,6 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo fixed = {} for n, hs in explicit.items(): atom = atoms[n] - charge = charges[n] - is_radical = radicals[n] len_h = len(hs) for i in range(len_h, 0, -1): hi = hs[:i] @@ -335,7 +324,7 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo explicit_dict[(bond.order, atoms[m].atomic_number)] += 1 try: # aromatic rings don't match any rule - rules = atom.valence_rules(charge, is_radical, explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: break for s, d, h in rules: @@ -349,23 +338,15 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo for n in to_remove: del atoms[n] - del charges[n] - del radicals[n] - del plane[n] - del hydrogens[n] for m in bonds.pop(n): del bonds[m][n] - try: - del parsed_mapping[n] - except KeyError: - pass for n, h in fixed.items(): - hydrogens[n] = h + atoms[n]._implicit_hydrogens = h if to_remove: - self.flush_cache() - self._conformers = [{x: y for x, y in c.items() if x not in to_remove} for c in self._conformers] # noqa + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() @@ -380,26 +361,28 @@ def explicify_hydrogens(self: 'MoleculeContainer', *, start_map=None, _return_ma :return: number of added atoms """ - hydrogens = self._hydrogens + atoms = self._atoms to_add = [] - for n, h in hydrogens.items(): + for n, a in atoms.items(): try: - to_add.extend([n] * h) + to_add.extend([n] * a.implicit_hydrogens) except TypeError: raise ValenceError(f'atom {n} has valence error') if to_add: log = [] bonds = self._bonds - m = start_map + m = start_map if start_map is not None else max(atoms) + 1 for n in to_add: - m = self.add_atom(H(), m) - bonds[n][m] = bonds[m][n] = b = Bond(1) - b._attach_graph(self, n, m) - hydrogens[n] = 0 + atoms[m] = H(implicit_hydrogens=0) + bonds[n][m] = b = Bond(1) + bonds[m] = {n: b} + atoms[n]._implicit_hydrogens = 0 log.append((n, m)) m += 1 + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() if _return_map: @@ -415,35 +398,33 @@ def check_valence(self: 'MoleculeContainer') -> List[int]: :return: list of invalid atoms """ - return [n for n, h in self._hydrogens.items() if h is None] # only invalid atoms have None hydrogens. + # only invalid atoms have None hydrogens. + return [n for n, a in self._atoms.items() if a.implicit_hydrogens is None] def clean_isotopes(self: 'MoleculeContainer') -> bool: """ Clean isotope marks from molecule. Return True if any isotope found. """ - atoms = self._atoms - isotopes = [x for x in atoms.values() if x.isotope] + isotopes = [x for x in self._atoms.values() if x.isotope] if isotopes: for i in isotopes: i._isotope = None - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) self.fix_stereo() return True return False def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): + atoms = self._atoms bonds = self._bonds - charges = self._charges - radicals = self._radicals - calc_implicit = self.calc_implicit log = [] fixed = set() - flush = False for r, (pattern, atom_fix, bonds_fix, any_atoms, is_tautomer) in enumerate(rules): if not fix_tautomers and is_tautomer: continue + keep_sssr = keep_components = True hs = set() seen = set() for mapping in pattern.get_mapping(self, automorphism_filter=False): @@ -457,53 +438,37 @@ def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): for n, (ch, ir) in atom_fix.items(): n = mapping[n] hs.add(n) - charges[n] += ch - if charges[n] > 4: - charges[n] -= ch + a = atoms[n] + a._charge += ch + if a.charge > 4: + a._charge -= ch log.append((tuple(match), r, f'bad charge formed. changes omitted: {pattern}')) break # skip changes if ir is not None: - radicals[n] = ir + a._is_radical = ir else: - for n, m, b in bonds_fix: + for n, m, bo in bonds_fix: n = mapping[n] m = mapping[m] hs.add(n) hs.add(m) if m in bonds[n]: - bonds[n][m]._Bond__order = b # noqa - if b == 8: - # expected original molecule don't contain `any` bonds or these bonds not changed - flush = True - else: - if b != 8: - flush = True - bonds[n][m] = bonds[m][n] = b = Bond(b) - b._attach_graph(self, n, m) + b = bonds[n][m] + if b.order == 8 or b == 8: + keep_sssr = False + b._order = bo + else: # new bond + keep_sssr = keep_components = False + bonds[n][m] = bonds[m][n] = Bond(bo) log.append((tuple(match), r, str(pattern))) if not hs: # not matched continue - # flush cache only for changed atoms. - if flush: # neighbors count changed - ngb = self.__dict__['__cached_args_method_neighbors'] - for n in hs: - try: - del ngb[(n,)] - except KeyError: - pass - del self.__dict__['bonds_count'] - flush = False - # need hybridization recalculation - hyb = self.__dict__['__cached_args_method_hybridization'] - for n in hs: - try: - del hyb[(n,)] - except KeyError: # already flushed before - pass + self.flush_cache(keep_sssr=keep_sssr, keep_components=keep_components) + # recalculate isomorphism labels + self.calc_labels() for n in hs: # hydrogens count recalculation - calc_implicit(n) - del self.__dict__['_cython_compiled_structure'] + self.calc_implicit(n) fixed.update(hs) return log, fixed diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 696b977c..31f0a0da 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -35,21 +35,18 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals :param logging: return list of changed atoms. """ atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - calc_implicit = self.calc_implicit entries, exits, rads, constrains, nitrogen_cat, nitrogen_ani, sulfur_cat = self.__entries() hs = set() while len(rads) > 1: n = rads.pop() for path in self.__find_delocalize_path(n, rads, constrains, True): - radicals[n] = False + atoms[n]._is_radical = False hs.add(n) for n, m, b in path: hs.add(m) - bonds[n][m]._Bond__order = b # noqa - radicals[m] = False # noqa + bonds[n][m]._order = b + atoms[m]._is_radical = False # noqa rads.discard(m) break # path found # path not found. atom n keep as is @@ -60,29 +57,31 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals if n in nitrogen_cat and m in nitrogen_ani: continue - c_m = charges[m] - 1 if m in sulfur_cat: # prevent X-[S+]=X >> X=S=X if b != 1: continue + atoms[m]._charge -= 1 else: # check cations end valence. + atoms[m]._charge -= 1 # reduce atom change and check valence try: - atoms[m].valence_rules(c_m, radicals[m], sum(int(y) for x, y in bonds[m].items() if x != l) + b) + atoms[m].valence_rules(sum(int(y) for x, y in bonds[m].items() if x != l) + b) except ValenceError: + atoms[m]._charge += 1 # roll back continue - charges[n] += 1 + # succeed! + atoms[n]._charge += 1 hs.add(n) for n, m, b in path: hs.add(m) - bonds[n][m]._Bond__order = b # noqa - charges[m] = c_m + bonds[n][m]._order = b exits.discard(m) break # path from negative atom to positive atom found. # path not found. keep negative atom n as is if hs: for n in hs: - calc_implicit(n) - self.flush_cache() + self.calc_implicit(n) + self.flush_cache(keep_sssr=True, keep_components=True) if _fix_stereo: self.fix_stereo() if logging: @@ -121,13 +120,9 @@ def __find_delocalize_path(self: 'MoleculeContainer', start, finish, constrains, if n not in seen and n in constrains and 1 <= (bo := b.order + diff) <= 3) def __entries(self: 'MoleculeContainer'): - hybridization = self.hybridization - neighbors = self.neighbors - charges = self._charges - radicals = self._radicals - bonds = self._bonds atoms = self._atoms - errors = {n for n, h in self._hydrogens.items() if h is None} + bonds = self._bonds + errors = {n for n, a in atoms.items() if a.implicit_hydrogens is None} transfer = set() entries = set() @@ -140,9 +135,9 @@ def __entries(self: 'MoleculeContainer'): if a.atomic_number not in {5, 6, 7, 8, 14, 15, 16, 33, 34, 52}: # filter non-organic set, halogens and aromatics continue - elif radicals[n]: + elif a.is_radical: rads.add(n) - elif charges[n] == -1: + elif a.charge == -1: if (lb := len(bonds[n])) == 4 and a.atomic_number == 5: # skip boron continue elif lb == 6 and a.atomic_number == 15: # skip [P-]X6 @@ -150,35 +145,37 @@ def __entries(self: 'MoleculeContainer'): if n in errors: # only valid anions accepted continue entries.add(n) - elif charges[n] == 1: + elif a.charge == 1: lb = len(bonds[n]) if a.atomic_number == 7: if lb == 4: # skip ammonia continue - elif lb == 2 and hybridization(n) == 3: # skip Azide + elif lb == 2 and a.hybridization == 3: # skip Azide (n1, b1), (n2, b2) = bonds[n].items() - if b1.order == b2.order == 2 and (charges[n1] == -1 and atoms[n1].atomic_number == 7 or - charges[n2] == -1 and atoms[n2].atomic_number == 7): + an1 = atoms[n1] + an2 = atoms[n2] + if b1.order == b2.order == 2 and (an1.charge == -1 and an1.atomic_number == 7 or + an2.charge == -1 and an2.atomic_number == 7): continue - elif lb == 3 and hybridization(n) == 2: # X=[N+](-X)-X - prevent N-N migration + elif lb == 3 and a.hybridization == 2: # X=[N+](-X)-X - prevent N-N migration nitrogen_ani.add(n) elif a.atomic_number == 15 and lb == 4: # skip [P+]R4 continue elif a.atomic_number == 16: - if lb == 2 and hybridization(n) == 2: # ad-hoc for X-[S+]=X + if lb == 2 and a.hybridization == 2: # ad-hoc for X-[S+]=X sulfur_cat.add(n) - elif lb == 3 and hybridization(n) == 1: # ad-hoc for X-[S+](-X)-X + elif lb == 3 and a.hybridization == 1: # ad-hoc for X-[S+](-X)-X continue exits.add(n) transfer.add(n) if exits or entries: # try to move cation to nitrogen. saturation fixup. for n, a in self._atoms.items(): - if a.atomic_number == 7 and not charges[n]: - if hybridization(n) == 1 and neighbors(n) <= 3: # any amine - potential e-donor + if a.atomic_number == 7 and not a.charge: + if a.hybridization == 1 and a.neighbors <= 3: # any amine - potential e-donor entries.add(n) nitrogen_cat.add(n) - elif hybridization(n) == 3 and neighbors(n) == 1: # N#X-[X-] >> [N-]=X=X + elif a.hybridization == 3 and a.neighbors == 1: # N#X-[X-] >> [N-]=X=X exits.add(n) nitrogen_ani.add(n) return entries, exits, rads, transfer, nitrogen_cat, nitrogen_ani, sulfur_cat diff --git a/chython/algorithms/standardize/salts.py b/chython/algorithms/standardize/salts.py index 08a34250..d281b593 100644 --- a/chython/algorithms/standardize/salts.py +++ b/chython/algorithms/standardize/salts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -33,16 +33,20 @@ def remove_metals(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Li :param logging: return deleted atoms list. """ + atoms = self._atoms bonds = self._bonds metals = [] - for n, a in self._atoms.items(): - if a.atomic_symbol not in {7, 3, 4, 11, 12, 19, 20, 37, 38, 55, 56} and not bonds[n]: + for n, a in atoms.items(): + if a.atomic_number in {7, 3, 4, 11, 12, 19, 20, 37, 38, 55, 56} and not bonds[n]: metals.append(n) if 0 < len(metals) < len(self): for n in metals: - self.delete_atom(n) + del atoms[n] + del bonds[n] + + self.flush_cache(keep_sssr=True) if logging: return metals return True @@ -64,27 +68,12 @@ def remove_acids(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Lis log.extend(c) if 0 < len(log) < len(self): # prevent singularity atoms = self._atoms - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - plane = self._plane bonds = self._bonds - parsed_mapping = self._parsed_mapping - - self._conformers.clear() # clean conformers. for n in log: del atoms[n] - del charges[n] - del radicals[n] - del hydrogens[n] - del plane[n] del bonds[n] - try: - del parsed_mapping[n] - except KeyError: - pass self.flush_cache() if logging: return log @@ -99,10 +88,10 @@ def split_metal_salts(self: 'MoleculeContainer', *, logging=False) -> Union[bool :param logging: return deleted bonds list. """ + atoms = self._atoms bonds = self._bonds - charges = self._charges - metals = [n for n, a in self._atoms.items() if a.atomic_number in + metals = [n for n, a in atoms.items() if a.atomic_number in {3, 4, 11, 12, 19, 20, 37, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102}] if metals: @@ -114,12 +103,12 @@ def split_metal_salts(self: 'MoleculeContainer', *, logging=False) -> Union[bool for n in metals: for m in acceptors & bonds[n].keys(): - if charges[n] == 4: # prevent overcharging + if atoms[n].charge == 4: # prevent overcharging break del bonds[n][m] del bonds[m][n] - charges[n] += 1 - charges[m] -= 1 + atoms[n]._charge += 1 + atoms[m]._charge -= 1 log.append((n, m)) if log: self.flush_cache() diff --git a/chython/algorithms/standardize/saturation.py b/chython/algorithms/standardize/saturation.py index df9de68a..38c5bb1e 100644 --- a/chython/algorithms/standardize/saturation.py +++ b/chython/algorithms/standardize/saturation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -72,13 +72,17 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, raise ValenceError('only single bonded skeleton can be saturated') atoms = self._atoms if not reset_electrons: - expected_radicals_count = any(self._radicals.values()) + expected_radicals_count = sum(a.is_radical for a in atoms.values()) expected_charge = int(self) + if reset_electrons: + charges = {x: None for x in self._atoms} + radicals = {x: None for x in self._atoms} + else: + charges = {n: a.charge for n, a in self._atoms.items()} + radicals = {n: a.is_radical for n, a in self._atoms.items()} sat, adjacency = _find_possible_valences(atoms, neighbors_distances or self._bonds, - {x: None for x in self._atoms} if reset_electrons else self._charges, - {x: None for x in self._atoms} if reset_electrons else self._radicals, - neighbors_distances is not None) + charges, radicals, neighbors_distances is not None) charges = {} # new charge states radicals = {} # new radical states bonds = {n: {} for n in atoms} # new bonds @@ -95,8 +99,7 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, radicals[n] = r for m in env: if m not in seen: - bonds[n][m] = bonds[m][n] = b = Bond(1) - b._attach_graph(self, n, m) + bonds[n][m] = bonds[m][n] = Bond(1) else: unsaturated[n] = [(c, r, h)] else: @@ -142,8 +145,7 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, return False for n, m, b in sb: - bonds[n][m] = bonds[m][n] = b = Bond(b) - b._attach_graph(self, n, m) + bonds[n][m] = bonds[m][n] = Bond(b) for n, c, r in sa: charges[n] = c radicals[n] = r @@ -155,10 +157,14 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, return False # reset molecule self._bonds = bonds - self._radicals = radicals - self._charges = charges - self._hydrogens = {x: 0 for x in atoms} # reset invalid hydrogens counts. + for n, r in radicals.items(): + atoms[n]._is_radical = r + for n, c in charges.items(): + atoms[n]._charge = c + for a in atoms.values(): + a._implicit_hydrogens = 0 # reset invalid hydrogens counts. self.flush_cache() + self.calc_labels() if logging: if not log: # check for errors log.append('Saturated successfully') From 0f46bc23ef72e56d0300c8e7ead355c58ec1d2b1 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 12:57:58 +0100 Subject: [PATCH 17/51] fix --- chython/algorithms/standardize/reaction.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chython/algorithms/standardize/reaction.py b/chython/algorithms/standardize/reaction.py index 17128417..1cb20f28 100644 --- a/chython/algorithms/standardize/reaction.py +++ b/chython/algorithms/standardize/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2022 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # Copyright 2021 Timur Gimadiev # Copyright 2024 Philippe Gantzer # This file is part of chython. @@ -90,7 +90,7 @@ def thiele(self: 'ReactionContainer', *, fix_tautomers=True) -> bool: """ total = False for m in self.molecules(): - if m.thiele(fix_tautomers=fix_tautomers) and not total: + if m.thiele(fix_tautomers=fix_tautomers): total = True if total: self.flush_cache() @@ -105,7 +105,7 @@ def kekule(self: 'ReactionContainer', *, buffer_size=7) -> bool: """ total = False for m in self.molecules(): - if m.kekule(buffer_size=buffer_size) and not total: + if m.kekule(buffer_size=buffer_size): total = True if total: self.flush_cache() @@ -118,7 +118,7 @@ def clean_isotopes(self: 'ReactionContainer') -> bool: """ flag = False for m in self.molecules(): - if m.clean_isotopes() and not flag: + if m.clean_isotopes(): flag = True if flag: self.flush_cache() From 177373e57e94932bbc0081eb32b10f742715efd4 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 13:17:50 +0100 Subject: [PATCH 18/51] isomorphism fixed --- chython/algorithms/isomorphism.py | 20 ++++++++++---------- chython/containers/bonds.py | 2 -- chython/containers/molecule.py | 4 ++++ 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index ce9193bc..eb44cc71 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -106,7 +106,7 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No seen = set() if len(components) == 1: - for candidate in other._connected_components: + for candidate in other.connected_components: if searching_scope: candidate = searching_scope.intersection(candidate) if not candidate: @@ -119,7 +119,7 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No seen.add(atoms) yield mapping else: - for candidates in permutations(other._connected_components, len(components)): + for candidates in permutations(other.connected_components, len(components)): mappers = [] for component, candidate in zip(components, candidates): if searching_scope: @@ -206,23 +206,23 @@ def _cython_compiled_structure(self): if a.isotope: v3 = 1 << (a.isotope - a.mdl_isotope + 54) - if radicals[n]: + if a.is_radical: v3 |= 0x200000000000 else: v3 |= 0x100000000000 - elif radicals[n]: + elif a.is_radical: v3 = 0x8000200000000000 else: v3 = 0x8000100000000000 - v3 |= 1 << (charges[n] + 39) - v3 |= 1 << ((hydrogens[n] or 0) + 30) - v3 |= 1 << (neighbors(n) + 15) - v3 |= 1 << heteroatoms(n) + v3 |= 1 << (a.charge + 39) + v3 |= 1 << ((a.implicit_hydrogens or 0) + 30) + v3 |= 1 << (a.neighbors + 15) + v3 |= 1 << a.heteroatoms - if n in rings_sizes: + if a.ring_sizes: v4 = 0 - for r in rings_sizes[n]: + for r in a.ring_sizes: if r > 65: # big rings not supported continue v4 |= 1 << (65 - r) diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index 79f13cad..a6ce7721 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -28,7 +28,6 @@ def __init__(self, order: int): elif order not in (1, 4, 2, 3, 8): raise ValueError('order should be from [1, 2, 3, 4, 8]') self._order = order - self._in_ring = False self._stereo = None def __eq__(self, other): @@ -72,7 +71,6 @@ def copy(self, full=False, stereo=False) -> 'Bond': copy._stereo = self.stereo copy._in_ring = self.in_ring else: - copy._in_ring = False if stereo: copy._stereo = self.stereo else: diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index be079c02..9c47cf46 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -707,13 +707,17 @@ def fix_structure(self, recalculate_hydrogens=True): def calc_labels(self): atoms = self._atoms atoms_rings_sizes = self.atoms_rings_sizes # expensive: sssr based + atoms_rings = {n: set(r) for n, r in self.atoms_rings.items()} for n, m_bond in self._bonds.items(): neighbors = 0 heteroatoms = 0 hybridization = 1 explicit_hydrogens = 0 + ar = atoms_rings[n] for m, bond in m_bond.items(): + bond._in_ring = not ar.isdisjoint(atoms_rings[m]) # have common rings + order = bond.order if order == 8: continue From d983bb5bc3d3b4f021d1a8c5fea258397161d1eb Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 10 Nov 2024 13:38:58 +0100 Subject: [PATCH 19/51] isomorphism fixed --- chython/algorithms/isomorphism.py | 4 ++-- chython/containers/molecule.py | 4 ++-- chython/periodictable/base/query.py | 11 ++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index eb44cc71..4f8c1e74 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -22,7 +22,7 @@ from itertools import permutations from typing import Any, Collection, Dict, Iterator, Optional, TYPE_CHECKING, Union from .._functions import lazy_product -from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement +from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement, QueryElement if TYPE_CHECKING: @@ -367,7 +367,7 @@ def _cython_compiled_query(self): else: v1 = 1 << (57 - n) v2 = 0 - if a.isotope: + if isinstance(a, QueryElement) and a.isotope: v3 = 1 << (a.isotope - a.mdl_isotope + 54) if a.is_radical: v3 |= 0x200000000000 diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 9c47cf46..fc2c7cb2 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -714,9 +714,9 @@ def calc_labels(self): heteroatoms = 0 hybridization = 1 explicit_hydrogens = 0 - ar = atoms_rings[n] + anr = atoms_rings.get(n) or False for m, bond in m_bond.items(): - bond._in_ring = not ar.isdisjoint(atoms_rings[m]) # have common rings + bond._in_ring = anr and (amr := atoms_rings.get(m) or False) and not anr.isdisjoint(amr) # have common rings order = bond.order if order == 8: diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 2089bc17..1d00a29b 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -268,7 +268,7 @@ def __eq__(self, other): return False if self.ring_sizes: if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): + if other.ring_sizes.isdisjoint(self.ring_sizes): return False elif other.ring_sizes: # not in ring expected return False @@ -342,7 +342,7 @@ def __eq__(self, other): return False if self.ring_sizes: if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): + if other.ring_sizes.isdisjoint(self.ring_sizes): return False elif other.ring_sizes: # not in ring expected return False @@ -407,6 +407,11 @@ def isotope(self, value: Optional[int]): raise TypeError('isotope must be an int') self._isotope = value + @property + @abstractmethod + def mdl_isotope(self) -> int: + ... + @classmethod def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: """ @@ -485,7 +490,7 @@ def __eq__(self, other): return False if self.ring_sizes: if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): + if other.ring_sizes.isdisjoint(self.ring_sizes): return False elif other.ring_sizes: # not in ring expected return False From acb4ad90da5f6d6447102f0aa8599838edb06e07 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:18:53 +0100 Subject: [PATCH 20/51] fixed depict --- chython/algorithms/depict.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index 1189d32a..a48eb6c7 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -344,9 +344,6 @@ def __render_atoms(self: 'MoleculeContainer', uid): stroke_width_o = other_size * .1 stroke_width_m = mapping_size * .1 - # for cumulenes - cumulenes = {y for x in self._cumulenes(heteroatoms=True) if len(x) > 2 for y in x[1:-1]} - svg = [] maps = [] symbols = [] @@ -358,7 +355,8 @@ def __render_atoms(self: 'MoleculeContainer', uid): for n, atom in self._atoms.items(): x, y = atom.x, -atom.y symbol = atom.atomic_symbol - if not bonds[n] or symbol != 'C' or carbon or atom.charge or atom.is_radical or atom.isotope or n in cumulenes: + if (symbol != 'C' or atom.charge or atom.is_radical or atom.isotope or carbon + or not bonds[n] or sum(b == 2 for b in bonds[n].values()) == 2): if atom.charge: others.append(f' ' f'{_render_charge[atom.charge]}{"↑" if atom.is_radical else ""}') From d1a3909b6487dd69a13bffcbfbca607944fdadc4 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:23:44 +0100 Subject: [PATCH 21/51] new attrs of atoms. fixes. --- chython/algorithms/rings.py | 2 +- chython/algorithms/x3dom.py | 10 ++++++-- chython/files/xyz.py | 8 +++--- chython/periodictable/base/element.py | 14 +++++++++++ chython/periodictable/groupI.py | 4 +++ chython/periodictable/groupXIII.py | 8 ++++++ chython/periodictable/groupXIV.py | 24 ++++++++++++++++++ chython/periodictable/groupXV.py | 32 ++++++++++++++++++++++++ chython/periodictable/groupXVI.py | 32 ++++++++++++++++++++++++ chython/periodictable/groupXVII.py | 36 +++++++++++++++++++++++++++ 10 files changed, 162 insertions(+), 8 deletions(-) diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index d2cecf1d..37cde6dc 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -51,7 +51,7 @@ def sssr(self) -> List[Tuple[int, ...]]: @cached_property def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: """ - Dict of atoms rings which contains it. + A dictionary with atom numbers as keys and a list of tuples (representing rings) as values. """ rings = defaultdict(list) for r in self.sssr: diff --git a/chython/algorithms/x3dom.py b/chython/algorithms/x3dom.py index f5da216d..2118899b 100644 --- a/chython/algorithms/x3dom.py +++ b/chython/algorithms/x3dom.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # Copyright 2020 Dinar Batyrshin # This file is part of chython. # @@ -141,7 +141,13 @@ def depict3d(self: Union['MoleculeContainer', 'X3domMolecule'], index: int = 0) :param index: index of conformer """ - xyz = self._conformers[index] + if not hasattr(self, '_conformers'): + raise ValueError('No conformers stored within structure') + try: + xyz = self._conformers[index] + except IndexError: + raise IndexError('Invalid conformer index') + mx = sum(x for x, _, _ in xyz.values()) / len(xyz) my = sum(y for _, y, _ in xyz.values()) / len(xyz) mz = sum(z for _, _, z in xyz.values()) / len(xyz) diff --git a/chython/files/xyz.py b/chython/files/xyz.py index 612415bc..a77a8489 100644 --- a/chython/files/xyz.py +++ b/chython/files/xyz.py @@ -31,16 +31,13 @@ def xyz(matrix: Sequence[Tuple[str, float, float, float]], charge=0, radical=0, mol = _cls() conformer = {} - mol._conformers.append(conformer) + mol._conformers = [conformer] atoms = mol._atoms bonds = mol._bonds for n, (a, x, y, z) in enumerate(matrix, 1): - atoms[n] = atom = Element.from_symbol(a)() + atoms[n] = Element.from_symbol(a)(x=x, y=y, implicit_hydrogens=0) bonds[n] = {} - atom.x = x - atom.y = y - atom._implicit_hydrogens = 0 conformer[n] = (x, y, z) if atom_charge is not None and None not in atom_charge: @@ -48,6 +45,7 @@ def xyz(matrix: Sequence[Tuple[str, float, float, float]], charge=0, radical=0, atoms[n]._charge = c charge = sum(atom_charge) + mol.calc_labels() pb = possible_bonds(array(list(conformer.values())), array([a.atomic_radius for a in atoms.values()]), radius_multiplier) diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 88ca210e..7818af9a 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -118,6 +118,20 @@ def mdl_isotope(self) -> int: MDL MOL common isotope """ + @property + def is_forming_single_bonds(self) -> bool: + """ + Atom can form stable covalent single bonds in molecules + """ + return False + + @property + def is_forming_double_bonds(self) -> bool: + """ + Atom can form stable covalent double bonds in molecules + """ + return False + @property def charge(self) -> int: """ diff --git a/chython/periodictable/groupI.py b/chython/periodictable/groupI.py index a0505f20..df3631f2 100644 --- a/chython/periodictable/groupI.py +++ b/chython/periodictable/groupI.py @@ -52,6 +52,10 @@ def atomic_radius(self): def mdl_isotope(self): return 1 + @property + def is_forming_single_bonds(self): + return True + class Li(Element, PeriodII, GroupI): __slots__ = () diff --git a/chython/periodictable/groupXIII.py b/chython/periodictable/groupXIII.py index ef5243a6..e89d745f 100644 --- a/chython/periodictable/groupXIII.py +++ b/chython/periodictable/groupXIII.py @@ -55,6 +55,14 @@ def atomic_radius(self): def mdl_isotope(self): return 11 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Al(Element, PeriodIII, GroupXIII): __slots__ = () diff --git a/chython/periodictable/groupXIV.py b/chython/periodictable/groupXIV.py index bd94ad60..43cca943 100644 --- a/chython/periodictable/groupXIV.py +++ b/chython/periodictable/groupXIV.py @@ -54,6 +54,14 @@ def atomic_radius(self): def mdl_isotope(self): return 12 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Si(Element, PeriodIII, GroupXIV): __slots__ = () @@ -86,6 +94,14 @@ def atomic_radius(self): def mdl_isotope(self): return 28 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Ge(Element, PeriodIV, GroupXIV): __slots__ = () @@ -118,6 +134,14 @@ def atomic_radius(self): def mdl_isotope(self): return 73 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Sn(Element, PeriodV, GroupXIV): __slots__ = () diff --git a/chython/periodictable/groupXV.py b/chython/periodictable/groupXV.py index 700efe89..5f031016 100644 --- a/chython/periodictable/groupXV.py +++ b/chython/periodictable/groupXV.py @@ -55,6 +55,14 @@ def atomic_radius(self): def mdl_isotope(self): return 14 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class P(Element, PeriodIII, GroupXV): __slots__ = () @@ -94,6 +102,14 @@ def atomic_radius(self): def mdl_isotope(self): return 31 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class As(Element, PeriodIV, GroupXV): __slots__ = () @@ -126,6 +142,14 @@ def atomic_radius(self): def mdl_isotope(self): return 75 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Sb(Element, PeriodV, GroupXV): __slots__ = () @@ -159,6 +183,14 @@ def atomic_radius(self): def mdl_isotope(self): return 122 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Bi(Element, PeriodVI, GroupXV): __slots__ = () diff --git a/chython/periodictable/groupXVI.py b/chython/periodictable/groupXVI.py index 85f72a23..0c782531 100644 --- a/chython/periodictable/groupXVI.py +++ b/chython/periodictable/groupXVI.py @@ -55,6 +55,14 @@ def atomic_radius(self): def mdl_isotope(self): return 16 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class S(Element, PeriodIII, GroupXVI): __slots__ = () @@ -235,6 +243,14 @@ def atomic_radius(self): def mdl_isotope(self): return 32 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Se(Element, PeriodIV, GroupXVI): __slots__ = () @@ -298,6 +314,14 @@ def atomic_radius(self): def mdl_isotope(self): return 79 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Te(Element, PeriodV, GroupXVI): __slots__ = () @@ -352,6 +376,14 @@ def atomic_radius(self): def mdl_isotope(self): return 128 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Po(Element, PeriodVI, GroupXVI): __slots__ = () diff --git a/chython/periodictable/groupXVII.py b/chython/periodictable/groupXVII.py index 3eecfc17..3be4f6a7 100644 --- a/chython/periodictable/groupXVII.py +++ b/chython/periodictable/groupXVII.py @@ -54,6 +54,10 @@ def atomic_radius(self): def mdl_isotope(self): return 19 + @property + def is_forming_single_bonds(self): + return True + class Cl(Element, PeriodIII, GroupXVII): __slots__ = () @@ -97,6 +101,14 @@ def atomic_radius(self): def mdl_isotope(self): return 35 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Br(Element, PeriodIV, GroupXVII): __slots__ = () @@ -147,6 +159,14 @@ def atomic_radius(self): def mdl_isotope(self): return 80 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class I(Element, PeriodV, GroupXVII): __slots__ = () @@ -219,6 +239,14 @@ def atomic_radius(self): def mdl_isotope(self): return 127 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class At(Element, PeriodVI, GroupXVII): __slots__ = () @@ -252,6 +280,14 @@ def atomic_radius(self): def mdl_isotope(self): return 210 + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Ts(Element, PeriodVII, GroupXVII): __slots__ = () From ebedc7f2b3d6b10a9d918bb43d4c2e899f3543eb Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:30:19 +0100 Subject: [PATCH 22/51] stereo refactored. simplified stereo in queries. now it's users problem to set it right. query isomorphism reduced to query to atom. --- chython/algorithms/isomorphism.py | 4 +- chython/algorithms/mapping/fixmapper.py | 4 +- chython/algorithms/smiles.py | 2 +- .../{stereo/molecule.py => stereo.py} | 864 +++++++++++++----- chython/algorithms/stereo/__init__.py | 23 - chython/algorithms/stereo/graph.py | 467 ---------- chython/containers/molecule.py | 4 +- chython/containers/query.py | 3 +- chython/files/daylight/smiles.py | 4 +- chython/files/libinchi/wrapper.py | 4 +- chython/periodictable/base/query.py | 189 ++-- chython/reactor/base.py | 26 +- chython/utils/rdkit.py | 2 +- 13 files changed, 722 insertions(+), 874 deletions(-) rename chython/algorithms/{stereo/molecule.py => stereo.py} (52%) delete mode 100644 chython/algorithms/stereo/__init__.py delete mode 100644 chython/algorithms/stereo/graph.py diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 4f8c1e74..8c0de0a5 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -271,9 +271,9 @@ class QueryIsomorphism(Isomorphism): def get_mapping(self, other: Union['MoleculeContainer', 'QueryContainer'], /, *, automorphism_filter: bool = True, searching_scope: Optional[Collection[int]] = None, _cython=True): """ - Get self to other Molecule or Query substructure mapping generator. + Get Query to Molecule substructure mapping generator. - :param other: Molecule or Query + :param other: Molecule :param automorphism_filter: Skip matches to the same atoms. :param searching_scope: substructure atoms list to localize isomorphism. """ diff --git a/chython/algorithms/mapping/fixmapper.py b/chython/algorithms/mapping/fixmapper.py index 84768bdc..251eea95 100644 --- a/chython/algorithms/mapping/fixmapper.py +++ b/chython/algorithms/mapping/fixmapper.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -50,7 +50,7 @@ def fix_mapping(self: 'ReactionContainer', *, logging: bool = False) -> \ free_number = count(max(cgr) + 1) components = [(cgr.substructure(c), cgr.augmented_substructure(c, 2), # deep DEPENDS on rules! - set(c)) + c) for c in cgr.substructure(cgr.center_atoms).connected_components] r_atoms = ChainMap(*(x._atoms for x in self.reactants)) diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index b400a259..bbd43dfa 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -402,7 +402,7 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): # allene if n in self._stereo_allenes_terminals: t1, t2 = self._stereo_allenes_terminals[n] - env = self._stereo_allenes[n] + env = self.stereogenic_allenes[n] n1 = next(x for x in adjacency[t1] if x in env) n2 = next(x for x in adjacency[t2] if x in env) smi[3] = '@' if self._translate_allene_sign(n, n1, n2) else '@@' diff --git a/chython/algorithms/stereo/molecule.py b/chython/algorithms/stereo.py similarity index 52% rename from chython/algorithms/stereo/molecule.py rename to chython/algorithms/stereo.py index 9415d551..7421d3f5 100644 --- a/chython/algorithms/stereo/molecule.py +++ b/chython/algorithms/stereo.py @@ -20,10 +20,9 @@ from functools import cached_property from itertools import combinations, product from logging import getLogger, INFO -from typing import Dict, Set, Tuple, Union, TYPE_CHECKING -from .graph import Stereo -from ..morgan import _morgan -from ...exceptions import AtomNotFound, IsChiral, NotChiral +from typing import Dict, Set, Tuple, Union, List, Optional, TYPE_CHECKING +from .morgan import _morgan +from ..exceptions import AtomNotFound, IsChiral, NotChiral logger = getLogger('chython.stereo') @@ -34,6 +33,30 @@ from chython import MoleculeContainer +# 1 2 +# \ | +# \| +# n---3 +# / +# / +# 0 +_tetrahedron_translate = {(0, 1, 2): False, (1, 2, 0): False, (2, 0, 1): False, + (0, 2, 1): True, (1, 0, 2): True, (2, 1, 0): True, + (0, 3, 1): False, (3, 1, 0): False, (1, 0, 3): False, + (0, 1, 3): True, (1, 3, 0): True, (3, 0, 1): True, + (0, 2, 3): False, (2, 3, 0): False, (3, 0, 2): False, + (0, 3, 2): True, (3, 2, 0): True, (2, 0, 3): True, + (1, 3, 2): False, (3, 2, 1): False, (2, 1, 3): False, + (1, 2, 3): True, (2, 3, 1): True, (3, 1, 2): True} +# 2 1 +# \ / +# n---m +# / \ +# 0 3 +_alkene_translate = {(0, 1): False, (1, 0): False, (0, 3): True, (3, 0): True, + (2, 3): False, (3, 2): False, (2, 1): True, (1, 2): True} + + def _pyramid_sign(n, u, v, w): # # | n / @@ -121,9 +144,245 @@ def _allene_sign(mark, u, v, w): return 0 -class MoleculeStereo(Stereo): +class MoleculeStereo: __slots__ = () + def clean_stereo(self: 'MoleculeContainer'): + """ + Remove stereo data. + """ + for a in self._atoms.values(): + a._stereo = None + for _, bs in self._bonds: + for b in bs.values(): + b._stereo = None # flush twice, but it should be still faster + self.flush_cache(keep_sssr=True, keep_components=True) + + @cached_property + def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: + """ + Carbon sp3 atom numbers. + """ + tetra = [] + for n, atom in self._atoms.items(): + if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: + env = self._bonds[n] + if all(b == 1 for b in env.values()): + if sum(int(b) for b in env.values()) > 4: + continue + tetra.append(n) + return tuple(tetra) + + @cached_property + def cumulenes(self: 'MoleculeContainer') -> List[Tuple[int, ...]]: + """ + All double-bonds chains (e.g. alkenes, allenes, cumulenes). + """ + atoms = self._atoms + bonds = self._bonds + + adj = defaultdict(set) # double bonds adjacency matrix + for n, atom in atoms.items(): + if atom.is_forming_double_bonds: + adj_n = adj[n].add + for m, bond in bonds[n].items(): + if bond == 2 and atoms[m].is_forming_double_bonds: + adj_n(m) + if not adj: + return [] + + terminals = [x for x, y in adj.items() if len(y) == 1] # list to keep atoms order! + cumulenes = [] + while terminals: + n = terminals.pop() + m = adj[n].pop() + path = [n, m] + while m not in terminals: + if len(bonds[m]) > 2: # not cumulene. SO3, SO4- etc. + cumulenes.extend(zip(path, path[1:])) # keep single double bonds instead of cumulene chain. + break + adj_m = adj[m] + adj_m.discard(n) + n, m = m, adj_m.pop() + path.append(m) + else: + terminals.remove(m) + adj[m].pop() + cumulenes.append(tuple(path)) + return cumulenes + + @cached_property + def stereogenic_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int, int], Tuple[int, int, int, int]]]: + """ + Tetrahedrons which contains at least 3 non-hydrogen neighbors and corresponding neighbors order. + """ + # 2 + # | + # 1--K--3 + # | + # 4? + atoms = self._atoms + bonds = self._bonds + tetrahedrons = {} + for n in self.tetrahedrons: + if any(not atoms[x].is_forming_single_bonds for x in bonds[n]): + continue # skip metal-carbon complexes + env = tuple(x for x in bonds[n] if atoms[x].atomic_number != 1) + if len(env) in (3, 4): + tetrahedrons[n] = env + return tetrahedrons + + @cached_property + def stereogenic_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, ...], Tuple[int, int, Optional[int], Optional[int]]]: + """ + Cumulenes which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + # 5 4 + # \ / + # 2---3 + # / \ + # 1 6 + bonds = self._bonds + atoms = self._atoms + cumulenes = {} + for path in self.cumulenes: + nf = bonds[path[0]] + nl = bonds[path[-1]] + n1, m1 = path[1], path[-2] + if any(b == 3 or not atoms[m].is_forming_single_bonds and b != 8 + for m, b in nf.items() if m != n1): + continue # skip X=C=C structures and metal-carbon complexes + if any(b == 3 or not atoms[m].is_forming_single_bonds and b != 8 + for m, b in nl.items() if m != m1): + continue # skip X=C=C structures and metal-carbon complexes + nn = [x for x, b in nf.items() if x != n1 and atoms[x].atomic_number != 1 and b != 8] + mn = [x for x, b in nl.items() if x != m1 and atoms[x].atomic_number != 1 and b != 8] + if nn and mn: + sn = nn[1] if len(nn) == 2 else None + sm = mn[1] if len(mn) == 2 else None + cumulenes[path] = (nn[0], mn[0], sn, sm) + return cumulenes + + @cached_property + def stereogenic_allenes(self) -> Dict[int, Tuple[int, int, Optional[int], Optional[int]]]: + """ + Allenes which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + return {path[len(path) // 2]: env for path, env in self.stereogenic_cumulenes.items() if len(path) % 2} + + @cached_property + def stereogenic_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[int], Optional[int]]]: + """ + Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + stereo = {} + for path, env in self.stereogenic_cumulenes.items(): + if len(path) % 2: + continue + stereo[(path[0], path[-1])] = env + return stereo + + @cached_property + def ring_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int], Tuple[int], Tuple]]: + """ + Tetrahedrons in rings, except ring-linkers. Values are non-ring atoms. + """ + out = {} + atoms_rings = self.atoms_rings + tetrahedrons = self.stereogenic_tetrahedrons + points = self.rings_linker_tetrahedrons + environment = self.not_special_connectivity + for n, r in atoms_rings.items(): + if n in tetrahedrons and n not in points: + out[n] = tuple(environment[n].difference(atoms_rings)) + return out + + @cached_property + def rings_linker_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Tuple[int, int, int, int]]: + """ + A dictionary where the keys are tetrahedron atoms shared between two rings (not condensed rings) and the values + are tuples representing their neighbors in the first and second rings respectively. + """ + out = {} + tetrahedrons = self.stereogenic_tetrahedrons + for n, r in self.atoms_rings.items(): + if n in tetrahedrons: + for nr, mr in combinations(r, 2): + if len(set(nr).intersection(mr)) == 1: + ni = nr.index(n) + mi = mr.index(n) + out[n] = (nr[ni - 1], nr[ni - len(nr) + 1], mr[mi - 1], mr[mi - len(mr) + 1]) + break + return out + + @cached_property + def ring_cumulenes_terminals(self: 'MoleculeContainer') -> Set[Tuple[int, int]]: + """ + Terminal atoms of inside ring cumulenes. + """ + out = set() + ar = self.atoms_rings + for n, *_, m in self.stereogenic_cumulenes: + if n in ar and m in ar and not set(ar[n]).isdisjoint(ar[m]): + out.add((n, m)) + return out + + @cached_property + def rings_linker_cumulenes_terminals(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Tuple[int, int, int, int]]: + """ + Terminal atoms of cumulenes connecting two rings. Values are neighbors in first and second rings. + """ + out = {} + ar = self.atoms_rings + chord = self.ring_cumulenes_terminals + for (n, *_, m), (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): + if n in ar and m in ar and (n, m) not in chord: + out[(n, m)] = (n1, n2, m1, m2) + return out + + @cached_property + def ring_attached_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Union[Tuple[int, int], Tuple[int]]]: + """ + Cumulenes attached to rings from one side. Values are out of ring neighbor atoms. + """ + ar = self.atoms_rings + out = {} + for (n, *_, m), (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): + if n in ar: + if m in ar: + continue + if m2: + out[(n, m)] = (m1, m2) + else: + out[(n, m)] = (m1,) + elif m in ar: + if n2: + out[(n, m)] = (n1, n2) + else: + out[(n, m)] = (n1,) + return out + + @property + def chiral_tetrahedrons(self) -> Set[int]: + """ + Chiral tetrahedrons except already labeled ones. + """ + return self.__chiral_centers[0] + + @property + def chiral_cis_trans(self) -> Set[Tuple[int, int]]: + """ + Chiral cis-trans bonds except already labeled ones. + """ + return self.__chiral_centers[1] + + @property + def chiral_allenes(self) -> Set[int]: + """ + Chiral allenes except already labeled ones. + """ + return self.__chiral_centers[2] + def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cache=True): """ Add stereo data by wedge notation of bonds. Use it for tetrahedrons of allenes. @@ -132,73 +391,78 @@ def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cac :param m: number of atom to which wedge bond coming :param mark: up bond is 1, down is -1 """ - if n not in self._atoms: + atoms = self._atoms + if n not in atoms or m not in atoms or m not in self._bonds[n]: raise AtomNotFound - if n in self._atoms_stereo: + elif atoms[n].stereo is not None: raise IsChiral + elif c := self._stereo_allenes_centers.get(n): + # allenes + if atoms[c].stereo is not None: + raise IsChiral + elif c not in self.chiral_allenes: + raise NotChiral - plane = self._plane - if n in self._chiral_tetrahedrons: - if m not in self._bonds[n]: - raise AtomNotFound - th = self._stereo_tetrahedrons[n] - if self._atoms[m].atomic_number == 1: - s = _pyramid_sign((*plane[m], mark), *((*plane[x], 0) for x in th)) + t1, t2 = self._stereo_allenes_terminals[c] + order = self.stereogenic_allenes[c] + if atoms[m].atomic_number == 1: + if t1 == n: + m1 = order[1] + else: + t1, t2 = t2, t1 + m1 = order[0] + r = True else: - order = [(*plane[x], mark if x == m else 0) for x in th] + w = order.index(m) + if w == 0: + m1 = order[1] + r = False + elif w == 1: + m1 = order[0] + t1, t2 = t2, t1 + r = False + elif w == 2: + m1 = order[1] + r = True + else: + m1 = order[0] + t1, t2 = t2, t1 + r = True + if s := _allene_sign(mark, atoms[t1].xy, atoms[t2].xy, atoms[m1].xy): + atoms[c]._stereo = s < 0 if r else s > 0 + if clean_cache: + self.flush_cache(keep_sssr=True, keep_components=True) + # tetrahedrons + elif n in self.chiral_tetrahedrons: + th = self.stereogenic_tetrahedrons[n] + am = atoms[m] + if am.atomic_number == 1: + order = [] + for x in th: + ax = atoms[x] + order.append((ax.x, ax.y, 0)) + s = _pyramid_sign((am.x, am.y, mark), *order) + else: + order = [] + for x in th: + ax = atoms[x] + order.append((ax.x, ax.y, mark if x == m else 0)) if len(order) == 3: if len(self._bonds[n]) == 4: # explicit hydrogen x = next(x for x in self._bonds[n] if x not in th) - s = _pyramid_sign((*plane[x], 0), *order) + ax = atoms[x] + s = _pyramid_sign((ax.x, ax.y, 0), *order) else: - s = _pyramid_sign((*plane[n], 0), *order) + an = atoms[n] + s = _pyramid_sign((an.x, an.y, 0), *order) else: s = _pyramid_sign(order[-1], *order[:3]) if s: - self._atoms_stereo[n] = s > 0 + atoms[n]._stereo = s > 0 if clean_cache: - self.flush_cache() + self.flush_cache(keep_components=True, keep_sssr=True) else: - c = self._stereo_allenes_centers.get(n) - if c: - if c in self._allenes_stereo: - raise IsChiral - elif c not in self._chiral_allenes: - raise NotChiral - - t1, t2 = self._stereo_allenes_terminals[c] - order = self._stereo_allenes[c] - if self._atoms[m].atomic_number == 1: - if t1 == n: - m1 = order[1] - else: - t1, t2 = t2, t1 - m1 = order[0] - r = True - else: - w = order.index(m) - if w == 0: - m1 = order[1] - r = False - elif w == 1: - m1 = order[0] - t1, t2 = t2, t1 - r = False - elif w == 2: - m1 = order[1] - r = True - else: - m1 = order[0] - t1, t2 = t2, t1 - r = True - s = _allene_sign(mark, plane[t1], plane[t2], plane[m1]) - if s: - self._allenes_stereo[c] = s < 0 if r else s > 0 - if clean_cache: - self.flush_cache() - else: - # only tetrahedrons and allenes supported - raise NotChiral + raise NotChiral def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): """ @@ -206,11 +470,11 @@ def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): """ atoms = self._atoms flag = False - while self._chiral_cis_trans: + while self.chiral_cis_trans: stereo = False - for nm in self._chiral_cis_trans: + for nm in self.chiral_cis_trans: n, m = nm - n1, m1, *_ = self._stereo_cis_trans[nm] + n1, m1, *_ = self.stereogenic_cis_trans[nm] s = _cis_trans_sign(atoms[n1].xy, atoms[n].xy, atoms[m].xy, atoms[m1].xy) if s: stereo = True @@ -222,7 +486,7 @@ def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): else: break if flag and clean_cache: - self.flush_cache() + self.flush_cache(keep_components=True, keep_sssr=True) def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mark: bool, *, clean_cache=True): """ @@ -243,14 +507,14 @@ def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mar if not isinstance(mark, bool): raise TypeError('stereo mark should be bool') - if n in self._chiral_tetrahedrons: + if n in self.chiral_tetrahedrons: atom._stereo = self._translate_tetrahedron_sign(n, env, mark) if clean_cache: - self.flush_cache() - elif n in self._chiral_allenes: + self.flush_cache(keep_components=True, keep_sssr=True) + elif n in self.chiral_allenes: atom._stereo = self._translate_allene_sign(n, *env, mark) if clean_cache: - self.flush_cache() + self.flush_cache(keep_components=True, keep_sssr=True) else: # only tetrahedrons supported raise NotChiral @@ -281,14 +545,14 @@ def add_cis_trans_stereo(self: 'MoleculeContainer', n: int, m: int, n1: int, n2: if self._bonds[i][j].stereo is not None: raise IsChiral - if (n, m) in self._chiral_cis_trans: - self._bonds[i][j] = self._translate_cis_trans_sign(n, m, n1, n2, mark) + if (n, m) in self.chiral_cis_trans: + self._bonds[i][j]._stereo = self._translate_cis_trans_sign(n, m, n1, n2, mark) if clean_cache: - self.flush_cache() - elif (m, n) in self._chiral_cis_trans: - self._bonds[i][j] = self._translate_cis_trans_sign(m, n, n2, n1, mark) + self.flush_cache(keep_components=True, keep_sssr=True) + elif (m, n) in self.chiral_cis_trans: + self._bonds[i][j]._stereo = self._translate_cis_trans_sign(m, n, n2, n1, mark) if clean_cache: - self.flush_cache() + self.flush_cache(keep_components=True, keep_sssr=True) else: raise NotChiral @@ -303,55 +567,58 @@ def fix_stereo(self: 'MoleculeContainer'): """ Reset stereo marks. """ - if self._atoms_stereo: # filter tetrahedrons - stereo_tetrahedrons = self._stereo_tetrahedrons - atoms_stereo = {k: v for k, v in self._atoms_stereo.items() if k in stereo_tetrahedrons} - self._atoms_stereo = self_atoms_stereo = {} - else: - atoms_stereo = {} - - if self._allenes_stereo: # filter allenes - stereo_allenes = self._stereo_allenes - allenes_stereo = {k: v for k, v in self._allenes_stereo.items() if k in stereo_allenes} - self._allenes_stereo = self_allenes_stereo = {} - else: - allenes_stereo = {} - - if self._cis_trans_stereo: # filter cis-trans - stereo_cis_trans = self._stereo_cis_trans - cis_trans_stereo = {k: v for k, v in self._cis_trans_stereo.items() if k in stereo_cis_trans} - self._cis_trans_stereo = self_stereo_cis_trans = {} - else: - cis_trans_stereo = {} + stereo_tetrahedrons = self.stereogenic_tetrahedrons + stereo_allenes = self.stereogenic_allenes + stereo_cis_trans = self._stereo_cis_trans_terminals + atoms_stereo = [] + allenes_stereo = [] + cis_trans_stereo = [] + for n, a in self._atoms.items(): + if a.stereo is None: + continue + elif n in stereo_tetrahedrons: + atoms_stereo.append((n, a, a.stereo)) + elif n in stereo_allenes: + allenes_stereo.append((n, a, a.stereo)) + a._stereo = None # flush stereo label + + for n, m, b in self.bonds(): + if b.stereo is None: + continue + elif ta := stereo_cis_trans.get(n): + cis_trans_stereo.append((ta, b, b.stereo)) + b._stereo = None # flush stereo label + self.flush_stereo_cache() old_stereo = len(atoms_stereo) + len(allenes_stereo) + len(cis_trans_stereo) while old_stereo: - chiral_tetrahedrons = self._chiral_tetrahedrons - chiral_allenes = self._chiral_allenes - chiral_cis_trans = self._chiral_cis_trans + chiral_tetrahedrons = self.chiral_tetrahedrons + chiral_allenes = self.chiral_allenes + chiral_cis_trans = self.chiral_cis_trans - tmp = {} - for n, s in atoms_stereo.items(): + # filter out resolved + tmp = [] + for n, a, s in atoms_stereo: if n in chiral_tetrahedrons: - self_atoms_stereo[n] = s + a._stereo = s # restore stereo else: - tmp[n] = s + tmp.append((n, a, s)) atoms_stereo = tmp - tmp = {} - for n, s in allenes_stereo.items(): + tmp = [] + for n, a, s in allenes_stereo: if n in chiral_allenes: - self_allenes_stereo[n] = s + a._stereo = s # restore stereo else: - tmp[n] = s + tmp.append((n, a, s)) allenes_stereo = tmp - tmp = {} - for n, s in cis_trans_stereo.items(): - if n in chiral_cis_trans: - self_stereo_cis_trans[n] = s + tmp = [] + for ta, b, s in cis_trans_stereo: + if ta in chiral_cis_trans: + b._stereo = s else: - tmp[n] = s + tmp.append((ta, b, s)) cis_trans_stereo = tmp fail_stereo = len(atoms_stereo) + len(allenes_stereo) + len(cis_trans_stereo) @@ -360,26 +627,236 @@ def fix_stereo(self: 'MoleculeContainer'): old_stereo = fail_stereo self.flush_stereo_cache() + @cached_property + def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: + """ + Cis-Trans terminal atoms to cis-trans key mapping. Key is central double bond in a cumulene chain. + """ + terminals = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + i = len(path) // 2 + terminals[n] = terminals[m] = (path[i - 1], path[i]) + return terminals + + @cached_property + def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: + """ + Cis-Trans terminal and central atoms to terminal pair mapping. + """ + terminals = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + i = len(path) // 2 + terminals[n] = terminals[m] = terminals[path[i]] = terminals[path[i - 1]] = (n, m) + return terminals + + @cached_property + def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: + """ + Cis-Trans terminal atoms counterparts + """ + counterpart = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + counterpart[n] = m + counterpart[m] = n + return counterpart + + @cached_property + def _stereo_allenes_centers(self) -> Dict[int, int]: + """ + Allene terminal atom to center mapping + """ + terminals = {} + for c, (n, m) in self._stereo_allenes_terminals.items(): + terminals[n] = terminals[m] = c + return terminals + + @cached_property + def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: + """ + Allene center atom to terminals mapping + """ + return {path[len(path) // 2]: (path[0], path[-1]) for path in self.stereogenic_cumulenes if len(path) % 2} + + def _translate_tetrahedron_sign(self: 'MoleculeContainer', n, env, s=None): + """ + Get sign of chiral tetrahedron atom for specified neighbors order + + :param n: stereo atom + :param env: neighbors order + :param s: if None, use existing sign else translate given to molecule + """ + if s is None: + s = self._atoms[n].stereo + if s is None: + raise KeyError + + order = self.stereogenic_tetrahedrons[n] + if len(order) == 3: + if len(env) == 4: # hydrogen atom passed to env + # hydrogen always last in order + try: + order = (*order, next(x for x in env if self._atoms[x].atomic_number == 1)) # see translate scheme + except StopIteration: + raise KeyError + elif len(env) != 3: # pyramid or tetrahedron expected + raise ValueError('invalid atoms list') + elif len(env) not in (3, 4): # pyramid or tetrahedron expected + raise ValueError('invalid atoms list') + + translate = tuple(order.index(x) for x in env[:3]) + if _tetrahedron_translate[translate]: + return not s + return s + + def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): + """ + Get sign for specified opposite neighbors + + :param n: first double bonded atom + :param m: last double bonded atom + :param nn: neighbor of first atom + :param nm: neighbor of last atom + :param s: if None, use existing sign else translate given to molecule + """ + try: + n0, n1, n2, n3 = self.stereogenic_cis_trans[(n, m)] + except KeyError: + n0, n1, n2, n3 = self.stereogenic_cis_trans[(m, n)] + n, m = m, n # in alkenes sign not order depended + nn, nm = nm, nn + + if s is None: + i, j = self._stereo_cis_trans_centers[n] + s = self._bonds[i][j].stereo + if s is None: + raise KeyError + + if nn == n0: # same start + t0 = 0 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + t1 = 3 + else: + raise KeyError + elif nn == n1: + t0 = 1 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + t1 = 2 + else: + raise KeyError + elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: + t0 = 2 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + t1 = 3 + else: + raise KeyError + elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: + t0 = 3 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + t1 = 2 + else: + raise KeyError + else: + raise KeyError + + if _alkene_translate[(t0, t1)]: + return not s + return s + + def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): + """ + get sign for specified opposite neighbors + + :param c: central double bonded atom + :param nn: neighbor of first double bonded atom + :param nm: neighbor of last double bonded atom + :param s: if None, use existing sign else translate given to molecule + """ + if s is None: + s = self._atoms[c].stereo + if s is None: + raise KeyError + + n0, n1, n2, n3 = self.stereogenic_allenes[c] + if nn == n0: # same start + t0 = 0 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + t1 = 3 + else: + raise KeyError + elif nn == n1: + t0 = 1 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + t1 = 2 + else: + raise KeyError + elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: + t0 = 2 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + t1 = 3 + else: + raise KeyError + elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: + t0 = 3 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + t1 = 2 + else: + raise KeyError + else: + raise KeyError + + if _alkene_translate[(t0, t1)]: + return not s + return s + @cached_property def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): - atoms_stereo = self._atoms_stereo - allenes_centers = self._stereo_allenes_centers atoms = self._atoms + overlap = set() space = [] solved = [] seen = set() - for n, s in self._allenes_stereo.items(): - env = self._stereo_allenes[n] + for n, env in self.stereogenic_allenes.items(): + if atoms[n].stereo is None: + continue term = self._stereo_allenes_terminals[n] + overlap.update(term) # don't allow incoming wedge to allenes terminals orders = [(*env[:2], *term, n, True), (*env[1::-1], *term[::-1], n, True)] if env[2]: orders.append((env[2], env[1], *term, n, True)) if env[3]: orders.append((env[3], env[0], *term[::-1], n, True)) space.append(orders) - for n, s in atoms_stereo.items(): - order = list(self._stereo_tetrahedrons[n]) + for n, env in self.stereogenic_tetrahedrons.items(): + if atoms[n].stereo is None: + continue + overlap.add(n) # don't allow incoming wedge to stereo tetrahedrons + order = list(env) orders = [(*order, n, False)] for _ in range(1, len(order)): order = order.copy() @@ -394,20 +871,22 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): good = [] if orders[0][-1]: for x in orders: - if (x0 := x[0]) in seen or x0 not in atoms_stereo and x0 not in allenes_centers: + x0 = x[0] + if x0 in seen or x0 not in overlap: good.append(x) seen.add(x[2]) if good: - solved.append(max(good, key=lambda x: (atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) + solved.append(max(good, key=lambda x: (not atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) else: unsolved.append(orders) else: for x in orders: - if (x0 := x[0]) in seen or x0 not in atoms_stereo and x0 not in allenes_centers: + x0 = x[0] + if x0 in seen or x0 not in overlap: good.append(x) if good: seen.add(x[-2]) - solved.append(max(good, key=lambda x: (atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) + solved.append(max(good, key=lambda x: (not atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) else: unsolved.append(orders) space = unsolved @@ -441,7 +920,7 @@ def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): def __wedge_sign(self: 'MoleculeContainer', order): if order[-1]: # allene - s = self._translate_allene_sign(order[-2], *order[:2]) + s = self._translate_allene_sign(order[-2], order[0], order[1]) v = _allene_sign(1, self._atoms[order[2]].xy, self._atoms[order[3]].xy, self._atoms[order[1]].xy) if not v: logger.info(f'need 2d clean. allenes wedge stereo ambiguous for atom {order[-2]}') @@ -453,16 +932,21 @@ def __wedge_sign(self: 'MoleculeContainer', order): n = order[-2] s = self._translate_tetrahedron_sign(n, order[:-2]) # need recalculation if XY changed + ao0 = self._atoms[order[0]] + ao1 = self._atoms[order[1]] + ao2 = self._atoms[order[2]] if len(order) == 5: - v = _pyramid_sign((*self._atoms[n].xy, 0), - (*self._atoms[order[0]].xy, 1), - (*self._atoms[order[1]].xy, 0), - (*self._atoms[order[2]].xy, 0)) + an = self._atoms[n] + v = _pyramid_sign((an.x, an.y, 0), + (ao0.x, ao0.y, 1), + (ao1.x, ao1.y, 0), + (ao2.x, ao2.y, 0)) else: - v = _pyramid_sign((*self._atoms[order[3]].xy, 0), - (*self._atoms[order[0]].xy, 1), - (*self._atoms[order[1]].xy, 0), - (*self._atoms[order[2]].xy, 0)) + ao3 = self._atoms[order[3]] + v = _pyramid_sign((ao3.x, ao3.y, 0), + (ao0.x, ao0.y, 1), + (ao1.x, ao1.y, 0), + (ao2.x, ao2.y, 0)) if not v: logger.info(f'need 2d clean. tetrahedron wedge stereo ambiguous for atom {n}') if s: @@ -470,18 +954,6 @@ def __wedge_sign(self: 'MoleculeContainer', order): else: return n, order[0], -v - @property - def _chiral_tetrahedrons(self) -> Set[int]: - return self.__chiral_centers[0] - - @property - def _chiral_cis_trans(self) -> Set[Tuple[int, int]]: - return self.__chiral_centers[1] - - @property - def _chiral_allenes(self) -> Set[int]: - return self.__chiral_centers[2] - @cached_property def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} @@ -516,99 +988,11 @@ def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[i morgan = _morgan(morgan, self.int_adjacency) return morgan - @cached_property - def _rings_tetrahedrons_linkers(self: 'MoleculeContainer') -> Dict[int, Tuple[int, int, int, int]]: - """ - Ring-linkers tetrahedrons. - - Values are neighbors in first and second rings. - """ - out = {} - tetrahedrons = self._stereo_tetrahedrons - for n, r in self.atoms_rings.items(): - if n in tetrahedrons: - for nr, mr in combinations(r, 2): - if len(set(nr).intersection(mr)) == 1: - ni = nr.index(n) - mi = mr.index(n) - out[n] = (nr[ni - 1], nr[ni - len(nr) + 1], mr[mi - 1], mr[mi - len(mr) + 1]) - break - return out - - @cached_property - def _rings_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int], Tuple[int], Tuple]]: - """ - Tetrahedrons in rings, except ring-linkers. - - Values are out of ring atoms. - """ - out = {} - atoms_rings = self.atoms_rings - tetrahedrons = self._stereo_tetrahedrons - points = self._rings_tetrahedrons_linkers - environment = self.not_special_connectivity - for n, r in atoms_rings.items(): - if n in tetrahedrons and n not in points: - out[n] = tuple(environment[n].difference(atoms_rings)) - return out - - @cached_property - def _rings_cumulenes_linkers(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Tuple[int, int, int, int]]: - """ - Ring-linkers cumulenes except chords. - - Values are neighbors in first and second rings. - """ - out = {} - ar = self.atoms_rings - chord = self._rings_cumulenes - for (n, *_, m), (n1, m1, n2, m2) in self._stereo_cumulenes.items(): - if n in ar and m in ar and (n, m) not in chord: - out[(n, m)] = (n1, n2, m1, m2) - return out - - @cached_property - def _rings_cumulenes(self: 'MoleculeContainer') -> Set[Tuple[int, int]]: - """ - Cumulenes in rings always chiral. - """ - out = set() - ar = self.atoms_rings - for n, *_, m in self._stereo_cumulenes: - if n in ar and m in ar and not set(ar[n]).isdisjoint(ar[m]): - out.add((n, m)) - return out - - @cached_property - def _rings_cumulenes_attached(self: 'MoleculeContainer') -> Dict[Tuple[int, int], - Union[Tuple[int, int], Tuple[int]]]: - """ - Cumulenes attached to rings. - - Values are out of ring atoms. - """ - ar = self.atoms_rings - out = {} - for (n, *_, m), (n1, m1, n2, m2) in self._stereo_cumulenes.items(): - if n in ar: - if m in ar: - continue - if m2: - out[(n, m)] = (m1, m2) - else: - out[(n, m)] = (m1,) - elif m in ar: - if n2: - out[(n, m)] = (n1, n2) - else: - out[(n, m)] = (n1,) - return out - @cached_property def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): atoms_rings = self.atoms_rings - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans + tetrahedrons = self.stereogenic_tetrahedrons + cis_trans = self.stereogenic_cis_trans allenes_centers = self._stereo_allenes_centers cis_trans_terminals = self._stereo_cis_trans_terminals cis_trans_centers = self._stereo_cis_trans_centers @@ -618,7 +1002,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): # tetrahedron is chiral if all its neighbors are unique. chiral_t = {n for n, env in tetrahedrons.items() if len({morgan[x] for x in env}) == len(env)} # tetrahedrons-linkers is chiral if in each rings neighbors are unique. - chiral_t.update(n for n, (n1, n2, m1, m2) in self._rings_tetrahedrons_linkers.items() + chiral_t.update(n for n, (n1, n2, m1, m2) in self.rings_linker_tetrahedrons.items() if morgan[n1] != morgan[n2] and morgan[m1] != morgan[m2]) # required for axes detection. @@ -630,7 +1014,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): # ring-linkers and rings-attached also takes into account. chiral_c = set() chiral_a = set() - for path, (n1, m1, n2, m2) in self._stereo_cumulenes.items(): + for path, (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): if morgan[n1] != morgan.get(n2, 0) and morgan[m1] != morgan.get(m2, 0): n, m = path[0], path[-1] if len(path) % 2: @@ -640,7 +1024,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): stereogenic.add(n) stereogenic.add(m) # ring cumulenes always chiral. can be already added. - for nm in self._rings_cumulenes: + for nm in self.ring_cumulenes_terminals: n, m = nm if any(len(x) < 8 for x in atoms_rings[n]): # skip small rings. if n in chiral_c: # remove already added small rings cumulenes. @@ -660,22 +1044,22 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): # find chiral axes. build graph of stereogenic atoms in rings. # atoms connected then located in same ring or cumulene. - for n, env in self._rings_tetrahedrons.items(): + for n, env in self.ring_tetrahedrons.items(): if len(env) == 2: # one or zero non-ring neighbors stereogenic. n1, n2 = env if morgan[n1] == morgan[n2]: # only unique non-ring members required. continue graph[n] = set() stereogenic.add(n) # non-linker tetrahedrons in rings - stereogenic. - for n, (n1, n2, m1, m2) in self._rings_tetrahedrons_linkers.items(): + for n, (n1, n2, m1, m2) in self.rings_linker_tetrahedrons.items(): graph[n] = set() if morgan[n1] != morgan[n2] or morgan[m1] != morgan[m2]: stereogenic.add(n) # linkers with at least one unsymmetric ring. - for n, m in self._rings_cumulenes_linkers: + for n, m in self.rings_linker_cumulenes_terminals: graph[n] = {m} graph[m] = {n} # stereogenic atoms already found. - for (n, m), env in self._rings_cumulenes_attached.items(): + for (n, m), env in self.ring_attached_cumulenes.items(): if len(env) == 2: n1, n2 = env if morgan[n1] == morgan[n2]: # only unique non-ring members required. @@ -729,9 +1113,9 @@ def __differentiation(self: Union['MoleculeStereo', 'MoleculeContainer'], morgan atoms_stereo, cis_trans_stereo, allenes_stereo): bonds = self.int_adjacency - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans - allenes = self._stereo_allenes + tetrahedrons = self.stereogenic_tetrahedrons + cis_trans = self.stereogenic_cis_trans + allenes = self.stereogenic_allenes translate_tetrahedron = self._translate_tetrahedron_sign translate_cis_trans = self._translate_cis_trans_sign diff --git a/chython/algorithms/stereo/__init__.py b/chython/algorithms/stereo/__init__.py deleted file mode 100644 index 18f784a7..00000000 --- a/chython/algorithms/stereo/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2021 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from .graph import * -from .molecule import * - - -__all__ = ['MoleculeStereo', 'Stereo'] diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py deleted file mode 100644 index 59523deb..00000000 --- a/chython/algorithms/stereo/graph.py +++ /dev/null @@ -1,467 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019-2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from collections import defaultdict -from functools import cached_property -from typing import Dict, Optional, Tuple, TYPE_CHECKING, Union - - -if TYPE_CHECKING: - from chython import MoleculeContainer, QueryContainer - Container = Union[MoleculeContainer, QueryContainer] - - -_heteroatoms = {5, 6, 7, 8, 14, 15, 16, 17, 33, 34, 35, 52, 53} - -# 1 2 -# \ | -# \| -# n---3 -# / -# / -# 0 -_tetrahedron_translate = {(0, 1, 2): False, (1, 2, 0): False, (2, 0, 1): False, - (0, 2, 1): True, (1, 0, 2): True, (2, 1, 0): True, - (0, 3, 1): False, (3, 1, 0): False, (1, 0, 3): False, - (0, 1, 3): True, (1, 3, 0): True, (3, 0, 1): True, - (0, 2, 3): False, (2, 3, 0): False, (3, 0, 2): False, - (0, 3, 2): True, (3, 2, 0): True, (2, 0, 3): True, - (1, 3, 2): False, (3, 2, 1): False, (2, 1, 3): False, - (1, 2, 3): True, (2, 3, 1): True, (3, 1, 2): True} -# 2 1 -# \ / -# n---m -# / \ -# 0 3 -_alkene_translate = {(0, 1): False, (1, 0): False, (0, 3): True, (3, 0): True, - (2, 3): False, (3, 2): False, (2, 1): True, (1, 2): True} - -# allowed atoms. these atoms have stable covalent bonds. -_organic_subset = {1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 52, 53, 85} - - -class Stereo: - __slots__ = () - - @cached_property - def cumulenes(self) -> Tuple[Tuple[int, ...], ...]: - """ - Alkenes, allenes and cumulenes atoms numbers. - """ - return tuple(self._cumulenes()) - - @cached_property - def tetrahedrons(self: 'Container') -> Tuple[int, ...]: - """ - Carbon sp3 atoms numbers. - """ - tetra = [] - for n, atom in self._atoms.items(): - if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: - env = self._bonds[n] - if all(int(x) == 1 for x in env.values()): - if sum(int(x) for x in env.values()) > 4: - continue - tetra.append(n) - return tuple(tetra) - - def clean_stereo(self: 'Container'): - """ - Remove stereo data. - """ - for a in self._atoms.values(): - a._stereo = None - for _, bs in self._bonds: - for b in bs.values(): - b._stereo = None # flush twice, but it should be still faster - self.flush_cache() - - def get_mapping(self: 'Container', other: 'Container', **kwargs): - atoms_stereo = self._atoms_stereo - allenes_stereo = self._allenes_stereo - cis_trans_stereo = self._cis_trans_stereo - if atoms_stereo or allenes_stereo or cis_trans_stereo: - other_atoms_stereo = other._atoms_stereo - other_allenes_stereo = other._allenes_stereo - other_cis_trans_stereo = other._cis_trans_stereo - other_translate_tetrahedron_sign = other._translate_tetrahedron_sign - other_translate_allene_sign = other._translate_allene_sign - other_translate_cis_trans_sign = other._translate_cis_trans_sign - - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans - allenes = self._stereo_allenes - - for mapping in super().get_mapping(other, **kwargs): - for n, s in atoms_stereo.items(): - m = mapping[n] - if m not in other_atoms_stereo: # self stereo atom not stereo in other - break - # translate stereo mark in other in order of self tetrahedron - if other_translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) != s: - break - else: - for n, s in allenes_stereo.items(): - m = mapping[n] - if m not in other_allenes_stereo: # self stereo allene not stereo in other - break - # translate stereo mark in other in order of self allene - nn, nm, *_ = allenes[n] - if other_translate_allene_sign(m, mapping[nn], mapping[nm]) != s: - break - else: - for nm, s in cis_trans_stereo.items(): - n, m = nm - on, om = mapping[n], mapping[m] - if (on, om) not in other_cis_trans_stereo: - if (om, on) not in other_cis_trans_stereo: - break # self stereo cis_trans not stereo in other - else: - nn, nm, *_ = cis_trans[nm] - if other_translate_cis_trans_sign(om, on, mapping[nm], mapping[nn]) != s: - break - else: - nn, nm, *_ = cis_trans[nm] - if other_translate_cis_trans_sign(on, om, mapping[nn], mapping[nm]) != s: - break - else: - yield mapping - else: - yield from super().get_mapping(other, **kwargs) - - def _translate_tetrahedron_sign(self: 'Container', n, env, s=None): - """ - Get sign of chiral tetrahedron atom for specified neighbors order - - :param n: stereo atom - :param env: neighbors order - :param s: if None, use existing sign else translate given to molecule - """ - if s is None: - s = self._atoms[n].stereo - if s is None: - raise KeyError - - order = self._stereo_tetrahedrons[n] - if len(order) == 3: - if len(env) == 4: # hydrogen atom passed to env - # hydrogen always last in order - try: - order = (*order, next(x for x in env if self._atoms[x].atomic_number == 1)) # see translate scheme - except StopIteration: - raise KeyError - elif len(env) != 3: # pyramid or tetrahedron expected - raise ValueError('invalid atoms list') - elif len(env) not in (3, 4): # pyramid or tetrahedron expected - raise ValueError('invalid atoms list') - - translate = tuple(order.index(x) for x in env[:3]) - if _tetrahedron_translate[translate]: - return not s - return s - - def _translate_cis_trans_sign(self: 'Container', n, m, nn, nm, s=None): - """ - Get sign for specified opposite neighbors - - :param n: first double bonded atom - :param m: last double bonded atom - :param nn: neighbor of first atom - :param nm: neighbor of last atom - :param s: if None, use existing sign else translate given to molecule - """ - try: - n0, n1, n2, n3 = self._stereo_cis_trans[(n, m)] - except KeyError: - n0, n1, n2, n3 = self._stereo_cis_trans[(m, n)] - n, m = m, n # in alkenes sign not order depended - nn, nm = nm, nn - - if s is None: - i, j = self._stereo_cis_trans_centers[n] - s = self._bonds[i][j].stereo - if s is None: - raise KeyError - - if nn == n0: # same start - t0 = 0 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n1: - t0 = 1 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: - t0 = 2 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: - t0 = 3 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - else: - raise KeyError - - if _alkene_translate[(t0, t1)]: - return not s - return s - - def _translate_allene_sign(self: 'Container', c, nn, nm, s=None): - """ - get sign for specified opposite neighbors - - :param c: central double bonded atom - :param nn: neighbor of first double bonded atom - :param nm: neighbor of last double bonded atom - :param s: if None, use existing sign else translate given to molecule - """ - if s is None: - s = self._atoms[c].stereo - if s is None: - raise KeyError - - n0, n1, n2, n3 = self._stereo_allenes[c] - if nn == n0: # same start - t0 = 0 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n1: - t0 = 1 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: - t0 = 2 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: - t0 = 3 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - else: - raise KeyError - - if _alkene_translate[(t0, t1)]: - return not s - return s - - def _cumulenes(self: 'Container', heteroatoms=False): - atoms = self._atoms - bonds = self._bonds - - adj = defaultdict(set) # double bonds adjacency matrix - if heteroatoms: - for n, atom in atoms.items(): - if atom.atomic_number in _heteroatoms: - adj_n = adj[n].add - for m, bond in bonds[n].items(): - if int(bond) == 2 and atoms[m].atomic_number in _heteroatoms: - adj_n(m) - else: - for n, atom in atoms.items(): - if atom.atomic_number == 6: - adj_n = adj[n].add - for m, bond in bonds[n].items(): - if int(bond) == 2 and atoms[m].atomic_number == 6: - adj_n(m) - if not adj: - return () - - terminals = [x for x, y in adj.items() if len(y) == 1] - cumulenes = [] - while terminals: - n = terminals.pop(0) - m = adj[n].pop() - path = [n, m] - while m not in terminals: - adj_m = adj[m] - if len(adj_m) > 2: # not cumulene. SO3 etc. - cumulenes.extend(zip(path, path[1:])) # keep single double bonds. - break - adj_m.discard(n) - n, m = m, adj_m.pop() - path.append(m) - else: - terminals.remove(m) - adj[m].pop() - cumulenes.append(tuple(path)) - return cumulenes - - @cached_property - def _stereo_cumulenes(self: 'Container') -> Dict[Tuple[int, ...], Tuple[int, int, Optional[int], Optional[int]]]: - """ - Cumulenes which contains at least one non-hydrogen neighbor on both ends - """ - # 5 4 - # \ / - # 2---3 - # / \ - # 1 6 - bonds = self._bonds - atoms = self._atoms - cumulenes = {} - for path in self.cumulenes: - nf = bonds[path[0]] - nl = bonds[path[-1]] - n1, m1 = path[1], path[-2] - if any(b.order == 3 or atoms[m].atomic_number not in _organic_subset and b.order != 8 - for m, b in nf.items() if m != n1): - continue # skip X=C=C structures and metal-carbon complexes - if any(b.order == 3 or atoms[m].atomic_number not in _organic_subset and b.order != 8 - for m, b in nl.items() if m != m1): - continue # skip X=C=C structures and metal-carbon complexes - nn = [x for x, b in nf.items() if x != n1 and atoms[x].atomic_number != 1 and b.order != 8] - mn = [x for x, b in nl.items() if x != m1 and atoms[x].atomic_number != 1 and b.order != 8] - if nn and mn: - sn = nn[1] if len(nn) == 2 else None - sm = mn[1] if len(mn) == 2 else None - cumulenes[path] = (nn[0], mn[0], sn, sm) - return cumulenes - - @cached_property - def _stereo_tetrahedrons(self: 'Container') -> Dict[int, Union[Tuple[int, int, int], Tuple[int, int, int, int]]]: - """ - Tetrahedrons which contains at least 3 non-hydrogen neighbors - """ - # 2 - # | - # 1--K--3 - # | - # 4? - atoms = self._atoms - bonds = self._bonds - tetrahedrons = {} - for n in self.tetrahedrons: - if any(atoms[x].atomic_number not in _organic_subset for x in bonds[n]): - continue # skip metal-carbon complexes - env = tuple(x for x in bonds[n] if atoms[x].atomic_number != 1) - if len(env) in (3, 4): - tetrahedrons[n] = env - return tetrahedrons - - @cached_property - def _stereo_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[int], Optional[int]]]: - """ - Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends - """ - stereo = {} - for path, env in self._stereo_cumulenes.items(): - if len(path) % 2: - continue - stereo[(path[0], path[-1])] = env - return stereo - - @cached_property - def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: - """ - Cis-Trans terminal atoms to cis-trans key mapping. Key is central double bond in a cumulene chain. - """ - terminals = {} - for path in self._stereo_cumulenes: - if len(path) % 2: - continue - n, m = path[0], path[-1] - i = len(path) // 2 - terminals[n] = terminals[m] = (path[i - 1], path[i]) - return terminals - - @cached_property - def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: - """ - Cis-Trans terminal and central atoms to terminal pair mapping. - """ - terminals = {} - for path in self._stereo_cumulenes: - if len(path) % 2: - continue - n, m = path[0], path[-1] - i = len(path) // 2 - terminals[n] = terminals[m] = terminals[path[i]] = terminals[path[i - 1]] = (n, m) - return terminals - - @cached_property - def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: - """ - Cis-Trans terminal atoms counterparts - """ - counterpart = {} - for path in self._stereo_cumulenes: - if len(path) % 2: - continue - n, m = path[0], path[-1] - counterpart[n] = m - counterpart[m] = n - return counterpart - - @cached_property - def _stereo_allenes(self) -> Dict[int, Tuple[int, int, Optional[int], Optional[int]]]: - """ - Allenes which contains at least one non-hydrogen neighbor on both ends - """ - return {path[len(path) // 2]: env for path, env in self._stereo_cumulenes.items() if len(path) % 2} - - @cached_property - def _stereo_allenes_centers(self) -> Dict[int, int]: - """ - Allene terminal atom to center mapping - """ - terminals = {} - for c, (n, m) in self._stereo_allenes_terminals.items(): - terminals[n] = terminals[m] = c - return terminals - - @cached_property - def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: - """ - Allene center atom to terminals mapping - """ - return {path[len(path) // 2]: (path[0], path[-1]) for path in self._stereo_cumulenes if len(path) % 2} - - -__all__ = ['Stereo'] diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index fc2c7cb2..b7969687 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -285,7 +285,7 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} # check for full presence of cumulene chains and terminal attachments - for p in self._stereo_cumulenes.values(): + for p in self.stereogenic_cumulenes.values(): if not not_skin.issuperset(p): not_skin.difference_update(p) @@ -554,8 +554,6 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, mol._allenes_stereo = allenes_stereo mol._cis_trans_stereo = cis_trans_stereo - mol._conformers = [] - mol._parsed_mapping = {} mol._MoleculeContainer__meta = None mol._MoleculeContainer__name = None mol._atoms = atoms = {} diff --git a/chython/containers/query.py b/chython/containers/query.py index 7a218786..757925f2 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -21,12 +21,11 @@ from .graph import Graph from ..algorithms.isomorphism import QueryIsomorphism from ..algorithms.smiles import QuerySmiles -from ..algorithms.stereo import Stereo from ..periodictable import Element, QueryElement from ..periodictable.base import Query -class QueryContainer(Stereo, Graph[Query, QueryBond], QueryIsomorphism, QuerySmiles): +class QueryContainer(Graph[Query, QueryBond], QueryIsomorphism, QuerySmiles): __slots__ = () def add_atom(self, atom: Union[Query, Element, int, str], *args, **kwargs): diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 82687724..61f2d6cd 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -175,8 +175,8 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if not stereo_atoms and not data['stereo_bonds']: return - st = molecule._stereo_tetrahedrons - sa = molecule._stereo_allenes + st = molecule.stereogenic_tetrahedrons + sa = molecule.stereogenic_allenes sat = molecule._stereo_allenes_terminals ctc = molecule._stereo_cis_trans_counterpart diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index a3504a0b..aaefb948 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -135,8 +135,8 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if ignore_stereo or not data['stereo_atoms'] and not data['stereo_cumulenes'] and not data['stereo_allenes']: return - st = molecule._stereo_tetrahedrons - sa = molecule._stereo_allenes + st = molecule.stereogenic_tetrahedrons + sa = molecule.stereogenic_allenes ctc = molecule._stereo_cis_trans_counterpart stereo = [] diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py index 1d00a29b..70d1588e 100644 --- a/chython/periodictable/base/query.py +++ b/chython/periodictable/base/query.py @@ -21,10 +21,7 @@ from functools import cached_property from typing import Tuple, Type, List, Union, Optional from .element import Element - - -_inorganic = {'He', 'Ne', 'Ar', 'Kr', 'Xe', 'F', 'Cl', 'Br', 'I', 'B', 'C', 'N', 'O', - 'H', 'Si', 'P', 'S', 'Se', 'Ge', 'As', 'Sb', 'Te', 'At'} +from .groups import GroupXVIII def _validate(value, prop): @@ -229,18 +226,15 @@ def atomic_symbol(self) -> str: return 'M' def __eq__(self, other): - if isinstance(other, Element): - if other.atomic_symbol in _inorganic: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - return True - # metal is subset of metal. only - return (isinstance(other, AnyMetal) - and self.neighbors == other.neighbors - and self.hybridization == other.hybridization) + if not isinstance(other, Element): + return False + if other.is_forming_single_bonds or isinstance(other, GroupXVIII): + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + return True def __hash__(self): return hash((self.neighbors, self.hybridization)) @@ -257,35 +251,27 @@ def __eq__(self, other): """ Compare attached to molecules elements and query elements """ - if isinstance(other, Element): - if self.charge != other.charge: - return False - if self.is_radical != other.is_radical: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if other.ring_sizes.isdisjoint(self.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected + if not isinstance(other, Element): + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + elif other.ring_sizes: # not in ring expected return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - # any is subset of any. only - return (isinstance(other, AnyElement) - and self.charge == other.charge - and self.is_radical == other.is_radical - and self.neighbors == other.neighbors - and self.hybridization == other.hybridization - and self.ring_sizes == other.ring_sizes - and self.implicit_hydrogens == other.implicit_hydrogens - and self.heteroatoms == other.heteroatoms) + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True def __hash__(self): return hash((self.charge, self.is_radical, self.neighbors, self.hybridization, @@ -329,42 +315,29 @@ def __eq__(self, other): """ Compare attached to molecules elements and query elements """ - if isinstance(other, Element): - if other.atomic_number not in self.atomic_numbers: - return False - if self.charge != other.charge: - return False - if self.is_radical != other.is_radical: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if other.ring_sizes.isdisjoint(self.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected + if not isinstance(other, Element): + return False + if other.atomic_number not in self.atomic_numbers: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + elif other.ring_sizes: # not in ring expected return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - # List is subset of Any and List - elif (isinstance(other, (ListElement, AnyElement)) - and self.charge == other.charge - and self.is_radical == other.is_radical - and self.neighbors == other.neighbors - and self.hybridization == other.hybridization - and self.ring_sizes == other.ring_sizes - and self.implicit_hydrogens == other.implicit_hydrogens - and self.heteroatoms == other.heteroatoms): - # list should contain all elements of other list - if isinstance(other, ListElement): - return set(self.atomic_numbers).issubset(other.atomic_numbers) - return True - return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True def __hash__(self): return hash((self.atomic_numbers, self.charge, self.is_radical, self.neighbors, self.hybridization, @@ -475,47 +448,31 @@ def __eq__(self, other): """ compare attached to molecules elements and query elements """ - if isinstance(other, Element): - if self.atomic_number != other.atomic_number: - return False - if self.charge != other.charge: - return False - if self.is_radical != other.is_radical: - return False - if self.isotope and self.isotope != other.isotope: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if other.ring_sizes.isdisjoint(self.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected + if not isinstance(other, Element): + return False + if self.atomic_number != other.atomic_number: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.isotope and self.isotope != other.isotope: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + elif other.ring_sizes: # not in ring expected return False - return True - elif (isinstance(other, ExtendedQuery) - and self.charge == other.charge - and self.is_radical == other.is_radical - and self.neighbors == other.neighbors - and self.hybridization == other.hybridization - and self.ring_sizes == other.ring_sizes - and self.implicit_hydrogens == other.implicit_hydrogens - and self.heteroatoms == other.heteroatoms): - # query element should fully match other query element - if isinstance(other, QueryElement): - return self.atomic_number == other.atomic_number and self.isotope == other.isotope - # query element is subset of any element - elif isinstance(other, AnyElement): - return True - # query element should be in list - return isinstance(other, ListElement) and self.atomic_number in other.atomic_numbers - return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True def __hash__(self): return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.neighbors, diff --git a/chython/reactor/base.py b/chython/reactor/base.py index 073713e4..16f8b918 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # Copyright 2019 Adelia Fatykhova # This file is part of chython. # @@ -206,52 +206,52 @@ def __set_stereo(self, new, structure, mapping): for n, s in products._atoms_stereo.items(): m = mapping[n] new._atoms_stereo[m] = products._translate_tetrahedron_sign(n, [r_mapping[x] for x in - new._stereo_tetrahedrons[m]], s) + new.stereogenic_tetrahedrons[m]], s) stereo_override.add(m) for n, s in products._allenes_stereo.items(): m = mapping[n] - t1, t2, *_ = new._stereo_allenes[m] + t1, t2, *_ = new.stereogenic_allenes[m] new._allenes_stereo[m] = products._translate_allene_sign(n, r_mapping[t1], r_mapping[t2], s) stereo_override.add(m) for (n, m), s in products._cis_trans_stereo.items(): nm = (mapping[n], mapping[m]) try: - t1, t2, *_ = new._stereo_cis_trans[nm] + t1, t2, *_ = new.stereogenic_cis_trans[nm] except KeyError: nm = nm[::-1] - t2, t1, *_ = new._stereo_cis_trans[nm] + t2, t1, *_ = new.stereogenic_cis_trans[nm] new._cis_trans_stereo[nm] = products._translate_cis_trans_sign(n, m, r_mapping[t1], r_mapping[t2], s) stereo_override.update(nm) # set unmatched part stereo and not overridden by patch. for n, s in structure._atoms_stereo.items(): - if n in stereo_override or n not in new._stereo_tetrahedrons or \ + if n in stereo_override or n not in new.stereogenic_tetrahedrons or \ new._bonds[n].keys() != structure._bonds[n].keys(): # skip atoms with changed neighbors continue - new._atoms_stereo[n] = structure._translate_tetrahedron_sign(n, new._stereo_tetrahedrons[n], s) + new._atoms_stereo[n] = structure._translate_tetrahedron_sign(n, new.stereogenic_tetrahedrons[n], s) for n, s in structure._allenes_stereo.items(): - if n in stereo_override or n not in new._stereo_allenes or \ - set(new._stereo_allenes[n]) != set(structure._stereo_allenes[n]): + if n in stereo_override or n not in new.stereogenic_allenes or \ + set(new.stereogenic_allenes[n]) != set(structure.stereogenic_allenes[n]): # skip changed allenes continue - t1, t2, *_ = new._stereo_allenes[n] + t1, t2, *_ = new.stereogenic_allenes[n] new._allenes_stereo[n] = structure._translate_allene_sign(n, t1, t2, s) for nm, s in structure._cis_trans_stereo.items(): n, m = nm if n in stereo_override or m in stereo_override: continue - env = structure._stereo_cis_trans[nm] + env = structure.stereogenic_cis_trans[nm] try: - new_env = new._stereo_cis_trans[nm] + new_env = new.stereogenic_cis_trans[nm] except KeyError: nm = nm[::-1] try: - new_env = new._stereo_cis_trans[nm] + new_env = new.stereogenic_cis_trans[nm] except KeyError: continue t2, t1, *_ = new_env diff --git a/chython/utils/rdkit.py b/chython/utils/rdkit.py index 826387f6..bae12fd9 100644 --- a/chython/utils/rdkit.py +++ b/chython/utils/rdkit.py @@ -152,7 +152,7 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): for nm, s in data._cis_trans_stereo.items(): n, m = nm if m in bonds[n]: # cumulenes unsupported - nn, nm, *_ = data._stereo_cis_trans[nm] + nn, nm, *_ = data.stereogenic_cis_trans[nm] b = mol.GetBondBetweenAtoms(mapping[n], mapping[m]) b.SetStereoAtoms(mapping[nn], mapping[nm]) b.SetStereo(_cis if s else _trans) From 1bf7a687b649fd5361c0c4ecff2c3ab69c400578 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:42:13 +0100 Subject: [PATCH 23/51] fixes --- chython/algorithms/isomorphism.py | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 8c0de0a5..65a20c0e 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -280,25 +280,25 @@ def get_mapping(self, other: Union['MoleculeContainer', 'QueryContainer'], /, *, # _cython - by default cython implementation enabled. # disable it by overriding method if Query Atoms or Containers logic changed. # Lv, Ts and Og in cython optimized mode treated as equal. - if isinstance(other, QueryIsomorphism): - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) - elif isinstance(other, MoleculeIsomorphism): - if _cython: - try: # windows? ;) - from ._isomorphism import get_mapping as _cython_get_mapping - except ImportError: - components = get_mapping = None - else: - components = self._cython_compiled_query # override to cython data + if not isinstance(other, MoleculeIsomorphism): + raise TypeError('MoleculeContainer expected') - def get_mapping(query, scope): - return _cython_get_mapping(*query, *other._cython_compiled_structure, - array('I', [n in scope for n in other])) - else: + if _cython: + try: # windows? ;) + from ._isomorphism import get_mapping as _cython_get_mapping + except ImportError: components = get_mapping = None - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, - components=components, get_mapping=get_mapping) - raise TypeError('MoleculeContainer or QueryContainer expected') + else: + components = self._cython_compiled_query # override to cython data + + def get_mapping(query, scope): + return _cython_get_mapping(*query, *other._cython_compiled_structure, + array('I', [n in scope for n in other])) + else: + components = get_mapping = None + # todo: implement stereo + return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, + components=components, get_mapping=get_mapping) @cached_property def _cython_compiled_query(self): From faf88ac28f27ac7840471eba39f98510023e1773 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 22:56:43 +0100 Subject: [PATCH 24/51] smiles parser fixed --- chython/files/daylight/smiles.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 61f2d6cd..410df35a 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -175,6 +175,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if not stereo_atoms and not data['stereo_bonds']: return + atoms = molecule._atoms st = molecule.stereogenic_tetrahedrons sa = molecule.stereogenic_allenes sat = molecule._stereo_allenes_terminals @@ -182,10 +183,11 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): order = {mapping[n]: [mapping[m] for m in ms] for n, ms in data['order'].items()} + log = [] stereo = [] for i, s in stereo_atoms: n = mapping[i] - if not i and hydrogens[n]: # first atom in smiles has reversed chiral mark + if not i and atoms[n].implicit_hydrogens: # first atom in smiles has reversed chiral mark s = not s if n in st: @@ -196,6 +198,8 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): n1 = next(x for x in order[t1] if x in env) n2 = next(x for x in order[t2] if x in env) stereo.append((molecule.add_atom_stereo, n, (n1, n2), s)) + else: + log.append(f'non chiral atom {n} has stereo label in smiles') stereo_bonds = {mapping[n]: {mapping[m]: s for m, s in ms.items()} for n, ms in data['stereo_bonds'].items()} From b3fa72ece43fccfaf31e620df92909679a7a23f5 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 11 Nov 2024 23:08:47 +0100 Subject: [PATCH 25/51] smiles generator fixed --- chython/algorithms/smiles.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index bbd43dfa..4f2e14ae 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -448,7 +448,7 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): smi[2] = atom.atomic_symbol return ''.join(smi) - def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): + def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjacency, **kwargs): if not kwargs.get('bonds', True): return '' order = self._bonds[n][m].order @@ -475,14 +475,14 @@ def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): else: # order == 8 return '~' - def __ct_map(self, adjacency): + def __ct_map(self: 'MoleculeContainer', adjacency): stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} if not stereo_bonds: return {} ct_map = {} + sct = self.stereogenic_cis_trans ctc = self._stereo_cis_trans_centers ctt = self._stereo_cis_trans_terminals - sct = self._stereo_cis_trans ctcp = self._stereo_cis_trans_counterpart seen = set() From 57ef18d0d277992e9cb57c48a3602626021b5b46 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 11:02:03 +0100 Subject: [PATCH 26/51] morgan and rings refactored. no need for queries. dropped. --- chython/algorithms/isomorphism.py | 66 +++++++++++++++---------------- chython/algorithms/morgan.py | 6 +-- chython/algorithms/rings.py | 8 ++-- chython/algorithms/smiles.py | 14 +++++-- chython/containers/graph.py | 29 ++------------ chython/containers/molecule.py | 30 ++++++++++++-- 6 files changed, 82 insertions(+), 71 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 65a20c0e..a6ddea3e 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -27,7 +27,7 @@ if TYPE_CHECKING: from chython.containers.graph import Graph - from chython.containers import MoleculeContainer, QueryContainer + from chython.containers import MoleculeContainer class Isomorphism: @@ -49,14 +49,6 @@ def __gt__(self, other): def __ge__(self, other): return other.is_substructure(self) - def __contains__(self: 'Graph', other: Union[Element, Query, str]): - """ - Atom in Structure test. - """ - if isinstance(other, str): - return any(other == x.atomic_symbol for x in self._atoms.values()) - return any(other == x for x in self._atoms.values()) - def is_substructure(self, other, /) -> bool: """ Test self is substructure of other @@ -79,23 +71,7 @@ def is_equal(self, other, /) -> bool: return False return True - def is_automorphic(self): - """ - Test for automorphism symmetry of graph. - """ - try: - next(self.get_automorphism_mapping()) - except StopIteration: - return False - return True - - def get_automorphism_mapping(self: 'Graph') -> Iterator[Dict[int, int]]: - """ - Iterator of all possible automorphism mappings. - """ - return _get_automorphism_mapping(self.atoms_order, self._bonds) - - def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=None, + def _get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter=True, searching_scope=None, components=None, get_mapping=None) -> Iterator[Dict[int, int]]: if components is None: # ad-hoc for QueryContainer components, closures = self._compiled_query @@ -141,14 +117,36 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No @cached_property def _compiled_query(self: 'Graph'): - components, closures = _compile_query(self._atoms, self._bonds) - if self.connected_components_count > 1: - order = {x: n for n, c in enumerate(self.connected_components) for x in c} - components.sort(key=lambda x: order[x[0][0]]) - return components, closures + return _compile_query(self._atoms, self._bonds) class MoleculeIsomorphism(Isomorphism): + __slots__ = () + + def __contains__(self: 'MoleculeContainer', other: Union[Element, Query, str]): + """ + Atom in Structure test. + """ + if isinstance(other, str): + return any(other == x.atomic_symbol for x in self._atoms.values()) + return any(other == x for x in self._atoms.values()) + + def is_automorphic(self): + """ + Test for automorphism symmetry of graph. + """ + try: + next(self.get_automorphism_mapping()) + except StopIteration: + return False + return True + + def get_automorphism_mapping(self: 'MoleculeContainer') -> Iterator[Dict[int, int]]: + """ + Iterator of all possible automorphism mappings. + """ + return _get_automorphism_mapping(self.atoms_order, self._bonds) + def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, searching_scope: Optional[Collection[int]] = None): """ @@ -163,7 +161,7 @@ def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: boo raise TypeError('MoleculeContainer expected') @cached_property - def _cython_compiled_structure(self): + def _cython_compiled_structure(self: 'MoleculeContainer'): # long I: # bond: single, double, triple, aromatic, special = 5 bit # bond in ring: 2 bit @@ -268,7 +266,9 @@ def _cython_compiled_structure(self): class QueryIsomorphism(Isomorphism): - def get_mapping(self, other: Union['MoleculeContainer', 'QueryContainer'], /, *, automorphism_filter: bool = True, + __slots__ = () + + def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, searching_scope: Optional[Collection[int]] = None, _cython=True): """ Get Query to Molecule substructure mapping generator. diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index 36086ada..e200cbc3 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -27,14 +27,14 @@ if TYPE_CHECKING: - from chython.containers.graph import Graph + from chython.containers import MoleculeContainer class Morgan: __slots__ = () @cached_property - def atoms_order(self: 'Graph') -> Dict[int, int]: + def atoms_order(self: 'MoleculeContainer') -> Dict[int, int]: """ Morgan like algorithm for graph nodes ordering @@ -48,7 +48,7 @@ def atoms_order(self: 'Graph') -> Dict[int, int]: return _morgan({n: hash((hash(a), n in ring)) for n, a in self._atoms.items()}, self.int_adjacency) @cached_property - def int_adjacency(self: 'Graph') -> Dict[int, Dict[int, int]]: + def int_adjacency(self: 'MoleculeContainer') -> Dict[int, Dict[int, int]]: """ Adjacency with integer-coded bonds. """ diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 37cde6dc..4871d5fa 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: - from chython.containers.graph import Graph + from chython.containers import MoleculeContainer class Rings: @@ -111,7 +111,7 @@ def rings_count(self) -> int: return sum(len(x) for x in bonds.values()) // 2 - len(bonds) + len(_connected_components(bonds)) @cached_property - def not_special_connectivity(self: 'Graph') -> Dict[int, Set[int]]: + def not_special_connectivity(self: 'MoleculeContainer') -> Dict[int, Set[int]]: """ Graph connectivity without special bonds. """ @@ -124,7 +124,7 @@ def not_special_connectivity(self: 'Graph') -> Dict[int, Set[int]]: return bonds @cached_property - def connected_components(self: 'Graph') -> List[Set[int]]: + def connected_components(self: 'MoleculeContainer') -> List[Set[int]]: """ Isolated components of single graph. E.g. salts as ion pair. """ @@ -138,7 +138,7 @@ def connected_components_count(self) -> int: return len(self.connected_components) @cached_property - def skin_graph(self: 'Graph') -> Dict[int, Set[int]]: + def skin_graph(self: 'MoleculeContainer') -> Dict[int, Set[int]]: """ Graph without terminal atoms. Only rings and linkers """ diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index 4f2e14ae..fc0e7d01 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -322,8 +322,9 @@ def _format_atom(self, n, adjacency, **kwargs): def _format_bond(self, n, m, adjacency, **kwargs): ... - def _smiles_order(self: 'Graph', stereo=True) -> Callable: - return self.atoms_order.__getitem__ + @abstractmethod + def _smiles_order(self, stereo=True) -> Callable: + ... def _format_cxsmiles(self, order) -> Optional[str]: ... @@ -375,7 +376,7 @@ def sticky_smiles(self: Union['MoleculeContainer', 'MoleculeSmiles'], left: int, smiles = smiles[2:] return ''.join(smiles) - def _smiles_order(self: 'MoleculeContainer', stereo=True) -> Callable: + def _smiles_order(self: 'MoleculeContainer', stereo=True): if stereo: return self._chiral_morgan.__getitem__ else: @@ -527,6 +528,9 @@ def __ct_map(self: 'MoleculeContainer', adjacency): class CGRSmiles(Smiles): __slots__ = () + def _smiles_order(self: 'CGRContainer', stereo=True): + return self.atoms_order.__getitem__ + def _format_atom(self: 'CGRContainer', n, adjacency, **kwargs): atom = self._atoms[n] if atom.isotope: @@ -552,6 +556,10 @@ def _format_bond(self: 'CGRContainer', n, m, adjacency, **kwargs): class QuerySmiles(Smiles): __slots__ = () + def _smiles_order(self: 'QueryContainer', stereo=True): + # try to keep atoms order + return {n: i for i, n in enumerate(self._atoms)}.__getitem__ + def _format_cxsmiles(self: 'QueryContainer', order): hh = ['atomProp'] cx = [] diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 7fa5dead..4586969e 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -19,8 +19,6 @@ from abc import ABC, abstractmethod from functools import cached_property from typing import Dict, Generic, Iterator, Optional, Tuple, TypeVar -from ..algorithms.morgan import Morgan -from ..algorithms.rings import Rings from ..exceptions import AtomNotFound, MappingError, BondNotFound @@ -28,7 +26,7 @@ Bond = TypeVar('Bond') -class Graph(Generic[Atom, Bond], Morgan, Rings, ABC): +class Graph(Generic[Atom, Bond], ABC): __slots__ = ('_atoms', '_bonds', '__dict__') __class_cache__ = {} @@ -101,7 +99,7 @@ def add_atom(self, atom: Atom, n: Optional[int] = None) -> int: self._atoms[n] = atom self._bonds[n] = {} - self.flush_cache(keep_sssr=True) + self.flush_cache() return n @abstractmethod @@ -169,27 +167,8 @@ def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): u._bonds.update(other._bonds) return u - def flush_cache(self, *, keep_sssr=False, keep_components=False): - backup = {} - if keep_sssr: - # good to keep if no new bonds or bonds deletions or bonds to/from any change - if 'sssr' in self.__dict__: - backup['sssr'] = self.sssr - if 'atoms_rings' in self.__dict__: - backup['atoms_rings'] = self.atoms_rings - if 'atoms_rings_sizes' in self.__dict__: - backup['atoms_rings_sizes'] = self.atoms_rings_sizes - if 'ring_atoms' in self.__dict__: - backup['ring_atoms'] = self.ring_atoms - if 'not_special_connectivity' in self.__dict__: - backup['not_special_connectivity'] = self.not_special_connectivity - if 'rings_count' in self.__dict__: - backup['rings_count'] = self.rings_count - if keep_components: - # good to keep if no new bonds or bonds deletions - if 'connected_components' in self.__dict__: - backup['connected_components'] = self.connected_components - self.__dict__ = backup + def flush_cache(self): + self.__dict__.clear() def __copy__(self): return self.copy() diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index b7969687..f80a453d 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -32,6 +32,8 @@ from ..algorithms.isomorphism import MoleculeIsomorphism from ..algorithms.fingerprints import Fingerprints from ..algorithms.mcs import MCS +from ..algorithms.morgan import Morgan +from ..algorithms.rings import Rings from ..algorithms.smiles import MoleculeSmiles from ..algorithms.standardize import StandardizeMolecule from ..algorithms.stereo import MoleculeStereo @@ -41,9 +43,9 @@ from ..periodictable import DynamicElement, Element, QueryElement, H -class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, - MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, - X3domMolecule): +class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], Morgan, Rings, MoleculeIsomorphism, + Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, + Fingerprints, Tautomers, MCS, X3domMolecule): __slots__ = ('_meta', '_name', '_conformers', '_changed', '_backup') def __init__(self): @@ -823,6 +825,28 @@ def check_implicit(self, n: int, h: int) -> bool: return True return False + def flush_cache(self, *, keep_sssr=False, keep_components=False): + backup = {} + if keep_sssr: + # good to keep if no new bonds or bonds deletions or bonds to/from any change + if 'sssr' in self.__dict__: + backup['sssr'] = self.sssr + if 'atoms_rings' in self.__dict__: + backup['atoms_rings'] = self.atoms_rings + if 'atoms_rings_sizes' in self.__dict__: + backup['atoms_rings_sizes'] = self.atoms_rings_sizes + if 'ring_atoms' in self.__dict__: + backup['ring_atoms'] = self.ring_atoms + if 'not_special_connectivity' in self.__dict__: + backup['not_special_connectivity'] = self.not_special_connectivity + if 'rings_count' in self.__dict__: + backup['rings_count'] = self.rings_count + if keep_components: + # good to keep if no new bonds or bonds deletions + if 'connected_components' in self.__dict__: + backup['connected_components'] = self.connected_components + self.__dict__ = backup + def __int__(self): """ Total charge of molecule From 38e0bd1a409e50e09679bf6e0eae984303d246eb Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 11:16:55 +0100 Subject: [PATCH 27/51] optimizations added --- chython/containers/molecule.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index f80a453d..d56c122d 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -237,13 +237,22 @@ def delete_bond(self, n: int, m: int, *, _skip_calculation=False): self.fix_structure() self.fix_stereo() - def copy(self) -> 'MoleculeContainer': + def copy(self, *, keep_sssr=False, keep_components=False) -> 'MoleculeContainer': copy = super().copy() copy._name = self._name if self._meta is None: copy._meta = None else: copy._meta = self._meta.copy() + + if keep_sssr: + for k, v in self.__dict__.items(): + if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', + 'ring_atoms', 'not_special_connectivity', 'rings_count'): + copy.__dict__[k] = v + if keep_components: + if 'connected_components' in self.__dict__: + copy.__dict__['connected_components'] = self.connected_components return copy def union(self, other: 'MoleculeContainer', *, remap: bool = False, copy: bool = True) -> 'MoleculeContainer': @@ -829,18 +838,10 @@ def flush_cache(self, *, keep_sssr=False, keep_components=False): backup = {} if keep_sssr: # good to keep if no new bonds or bonds deletions or bonds to/from any change - if 'sssr' in self.__dict__: - backup['sssr'] = self.sssr - if 'atoms_rings' in self.__dict__: - backup['atoms_rings'] = self.atoms_rings - if 'atoms_rings_sizes' in self.__dict__: - backup['atoms_rings_sizes'] = self.atoms_rings_sizes - if 'ring_atoms' in self.__dict__: - backup['ring_atoms'] = self.ring_atoms - if 'not_special_connectivity' in self.__dict__: - backup['not_special_connectivity'] = self.not_special_connectivity - if 'rings_count' in self.__dict__: - backup['rings_count'] = self.rings_count + for k, v in self.__dict__.items(): + if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', + 'ring_atoms', 'not_special_connectivity', 'rings_count'): + backup[k] = v if keep_components: # good to keep if no new bonds or bonds deletions if 'connected_components' in self.__dict__: @@ -884,7 +885,7 @@ def __enter__(self): """ Transaction of changes. Keep current state for restoring on errors. """ - self._backup = self.copy() + self._backup = self.copy(keep_sssr=True, keep_components=True) return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -894,7 +895,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self._bonds = backup._bonds self._meta = backup._meta self._name = backup._name - self.flush_cache() + self.__dict__ = backup.__dict__ else: # update internal state self.fix_structure() self.fix_stereo() From bdef5809ff6ec805b5f08637d2e7d6ca560d04a1 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 11:37:46 +0100 Subject: [PATCH 28/51] tautomers refactored --- chython/algorithms/aromatics/kekule.py | 2 +- chython/algorithms/tautomers/__init__.py | 104 +++-------------- chython/algorithms/tautomers/acid_base.py | 111 +++++++++++-------- chython/algorithms/tautomers/heteroarenes.py | 22 ++-- chython/algorithms/tautomers/keto_enol.py | 35 +++--- 5 files changed, 108 insertions(+), 166 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index f7d90918..6848638c 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -62,7 +62,7 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): """ self.__fix_rings() # fix bad aromatic rings for form in self.__kekule_full(0): - copy = self.copy() + copy = self.copy(keep_sssr=True, keep_components=True) bonds = copy._bonds atoms = set() for n, m, b in form: diff --git a/chython/algorithms/tautomers/__init__.py b/chython/algorithms/tautomers/__init__.py index 7a628c6d..e180eaef 100644 --- a/chython/algorithms/tautomers/__init__.py +++ b/chython/algorithms/tautomers/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # Copyright 2020 Nail Samikaev # This file is part of chython. # @@ -51,47 +51,25 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar """ if limit < 1: raise ValueError('limit should be greater or equal 1') - - has_stereo = bool(self._atoms_stereo or self._allenes_stereo or self._cis_trans_stereo) counter = 0 - copy = self.copy() - copy.clean_stereo() - # sssr, neighbors and heteroatoms are same for all tautomers. - # prevent recalculation by sharing cache. - self.__set_cache(copy) + copy = self.copy(keep_sssr=True, keep_components=True) if prepare_molecules: # transform to kekule form without hydrogens - k = copy.kekule() - i = copy.implicify_hydrogens(_fix_stereo=False) - if k or i: # reset cache after flush - self.__set_cache(copy) - - thiele = copy.copy() # transform to thiele to prevent duplicates and dearomatization - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): - self.__set_cache(thiele) - - # return origin structure as first tautomer - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + copy.kekule() + copy.implicify_hydrogens(_fix_stereo=False) + + # transform to thiele to prevent duplicates and dearomatization + thiele = copy.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) + yield thiele # return original structure as first tautomer seen = {thiele: None} # value is parent molecule - required for preventing migrations in sugars. # first try to neutralize if copy.neutralize(_fix_stereo=False): # found neutral form - thiele = copy.copy() - self.__set_cache(copy) # restore cache - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): - self.__set_cache(thiele) - - # return found neutral form - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + thiele = copy.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) + yield thiele counter += 1 seen[thiele] = None @@ -107,11 +85,8 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current, thiele_current = queue.popleft() for mol, ket in current._enumerate_keto_enol_tautomers(partial): - thiele = mol.copy() - self.__set_cache(mol) - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): # reset cache after flush_cache. - self.__set_cache(thiele) + thiele = mol.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) if thiele not in seen: seen[thiele] = current @@ -124,10 +99,7 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar queue = deque([(mol, thiele)]) new_queue = [thiele] copy = mol # new entry point. - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + yield thiele break if keep_sugars and current is not copy and ket: # prevent carbonyl migration in sugars. skip entry point. @@ -138,10 +110,7 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar queue.append((mol, thiele)) new_queue.append(thiele) - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + yield thiele counter += 1 if counter == limit: return @@ -152,15 +121,11 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current = queue.popleft() for mol in current._enumerate_hetero_arene_tautomers(): - self.__set_cache(mol) if mol not in seen: seen[mol] = None queue.append(mol) new_queue.append(mol) # new hetero-arenes also should be included to this list. - if has_stereo: - yield self.__set_stereo(mol.copy()) - else: - yield mol + yield mol counter += 1 if counter == limit: return @@ -171,14 +136,10 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current = queue.popleft() for mol in current._enumerate_zwitter_tautomers(): - self.__set_cache(mol) if mol not in seen: seen[mol] = None queue.append(mol) - if has_stereo: - yield self.__set_stereo(mol.copy()) - else: - yield mol + yield mol counter += 1 if counter == limit: return @@ -206,34 +167,5 @@ def enumerate_charged_tautomers(self: 'MoleculeContainer', *, prepare_molecules= if count == limit: return - def __set_cache(self: 'MoleculeContainer', mol): - try: - neighbors = self.__dict__['__cached_args_method_neighbors'] - except KeyError: - neighbors = self.__dict__['__cached_args_method_neighbors'] = {} - try: - heteroatoms = self.__dict__['__cached_args_method_heteroatoms'] - except KeyError: - heteroatoms = self.__dict__['__cached_args_method_heteroatoms'] = {} - try: - is_ring_bond = self.__dict__['__cached_args_method_is_ring_bond'] - except KeyError: - is_ring_bond = self.__dict__['__cached_args_method_is_ring_bond'] = {} - - mol.__dict__['sssr'] = self.sssr # thiele/kekule - mol.__dict__['ring_atoms'] = self.ring_atoms # morgan - mol.__dict__['_connected_components'] = self._connected_components # isomorphism - mol.__dict__['atoms_rings_sizes'] = self.atoms_rings_sizes # isomorphism - mol.__dict__['__cached_args_method_neighbors'] = neighbors # isomorphism - mol.__dict__['__cached_args_method_heteroatoms'] = heteroatoms # isomorphism - mol.__dict__['__cached_args_method_is_ring_bond'] = is_ring_bond # isomorphism - - def __set_stereo(self: 'MoleculeContainer', mol): - mol._atoms_stereo.update(self._atoms_stereo) - mol._allenes_stereo.update(self._allenes_stereo) - mol._cis_trans_stereo.update(self._cis_trans_stereo) - mol.fix_stereo() - return mol - __all__ = ['Tautomers'] diff --git a/chython/algorithms/tautomers/acid_base.py b/chython/algorithms/tautomers/acid_base.py index bb1a672f..c901cbcd 100644 --- a/chython/algorithms/tautomers/acid_base.py +++ b/chython/algorithms/tautomers/acid_base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -44,9 +44,8 @@ def neutralize(self: 'MoleculeContainer', *, keep_charge=True, logging=False, return [] return False - self._charges.update(mol._charges) - self._hydrogens.update(mol._hydrogens) - self.flush_cache() + self._atoms.update(mol._atoms) + self.flush_cache(keep_sssr=True, keep_components=True) if _fix_stereo: self.fix_stereo() if logging: @@ -85,14 +84,16 @@ def enumerate_charged_forms(self: 'MoleculeContainer', *, deep: int = 4, limit: continue uniq.add(dc) seen_combo.add((dc, ac)) - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) for n in ac: - mol._hydrogens[n] += 1 - mol._charges[n] += 1 + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for n in dc: if n is not None: - mol._hydrogens[n] -= 1 - mol._charges[n] -= 1 + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 if mol not in seen: seen.add(mol) yield mol @@ -109,15 +110,17 @@ def enumerate_charged_forms(self: 'MoleculeContainer', *, deep: int = 4, limit: uniq.add(ac) if (dc, ac) in seen_combo: continue - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) for n in ac: if n is not None: - mol._hydrogens[n] += 1 - mol._charges[n] += 1 + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for n in dc: if n is not None: - mol._hydrogens[n] -= 1 - mol._charges[n] -= 1 + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 if mol not in seen: seen.add(mol) yield mol @@ -139,44 +142,52 @@ def _neutralize(self: 'MoleculeContainer', keep_charge=True): if not donors or not acceptors: return # neutralization impossible elif len(donors) > len(acceptors): - copy = self.copy() - for a in acceptors: - copy._hydrogens[a] += 1 - copy._charges[a] += 1 + copy = self.copy(keep_sssr=True, keep_components=True) + for n in acceptors: + a = copy._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for c in combinations(donors, len(acceptors)): - mol = copy.copy() - for d in c: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 + mol = copy.copy(keep_sssr=True, keep_components=True) + for n in c: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 yield mol, acceptors.union(c) elif len(donors) < len(acceptors): - copy = self.copy() - for d in donors: - copy._hydrogens[d] -= 1 - copy._charges[d] -= 1 + copy = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = copy._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 for c in combinations(acceptors, len(donors)): - mol = copy.copy() - for a in c: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = copy.copy(keep_sssr=True, keep_components=True) + for n in c: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors.union(c) else: # balanced! - mol = self.copy() - for d in donors: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 - for a in acceptors: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 + for n in acceptors: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors | acceptors elif donors or acceptors: - mol = self.copy() - for d in donors: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 - for a in acceptors: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 + for n in acceptors: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors | acceptors def _enumerate_zwitter_tautomers(self: 'MoleculeContainer'): @@ -190,11 +201,13 @@ def _enumerate_zwitter_tautomers(self: 'MoleculeContainer'): acceptors.add(mapping[1]) for d, a in product(donors, acceptors): - mol = self.copy() - mol._hydrogens[d] -= 1 - mol._hydrogens[a] += 1 - mol._charges[d] -= 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + d = mol._atoms[d] + a = mol._atoms[a] + d._implicit_hydrogens -= 1 + a._implicit_hydrogens += 1 + d._charge -= 1 + a._charge += 1 yield mol diff --git a/chython/algorithms/tautomers/heteroarenes.py b/chython/algorithms/tautomers/heteroarenes.py index 81837438..3e6ac345 100644 --- a/chython/algorithms/tautomers/heteroarenes.py +++ b/chython/algorithms/tautomers/heteroarenes.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -33,9 +33,6 @@ class HeteroArenes: def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): atoms = self._atoms bonds = self._bonds - hydrogens = self._hydrogens - charges = self._charges - radicals = self._radicals rings = defaultdict(list) # aromatic skeleton for n, m_bond in bonds.items(): @@ -49,19 +46,20 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): donors = set() single_bonded = set() for n, ms in rings.items(): + a = atoms[n] if len(ms) == 2: - if atoms[n].atomic_number in (5, 7, 15): - if not charges[n] and not radicals[n]: + if a.atomic_number in (5, 7, 15): + if not a.charge and not a.is_radical: # only neutral B, N, P - if hydrogens[n]: # pyrrole + if a.implicit_hydrogens: # pyrrole donors.add(n) elif len(bonds[n]) == 2: # pyridine acceptors.add(n) else: single_bonded.add(n) - elif charges[n] == -1 and atoms[n].atomic_number == 6: # ferrocene + elif a.charge == -1 and a.atomic_number == 6: # ferrocene single_bonded.add(n) - elif len(ms) == 3 and atoms[n].atomic_number in (5, 7, 15) and not charges[n] and not radicals[n]: + elif len(ms) == 3 and a.atomic_number in (5, 7, 15) and not a.charge and not a.is_radical: single_bonded.add(n) if not donors or not acceptors: return @@ -94,9 +92,9 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): next(_kekule_component(component, sb, (), 0)) except InvalidAromaticRing: continue - mol = self.copy() - mol._hydrogens[d] = 0 - mol._hydrogens[a] = 1 + mol = self.copy(keep_sssr=True, keep_components=True) + mol._atoms[d]._implicit_hydrogens = 0 + mol._atoms[a]._implicit_hydrogens = 1 yield mol diff --git a/chython/algorithms/tautomers/keto_enol.py b/chython/algorithms/tautomers/keto_enol.py index acad2241..f9fd582b 100644 --- a/chython/algorithms/tautomers/keto_enol.py +++ b/chython/algorithms/tautomers/keto_enol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -39,13 +39,13 @@ def _enumerate_keto_enol_tautomers(self: Union['MoleculeContainer', 'KetoEnol'], a = fix[0][0] d = fix[-1][1] - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) m_bonds = mol._bonds for n, m, b in fix: - m_bonds[n][m]._Bond__order = b + m_bonds[n][m]._order = b - mol._hydrogens[a] += 1 - mol._hydrogens[d] -= 1 + mol._atoms[a]._implicit_hydrogens += 1 + mol._atoms[d]._implicit_hydrogens -= 1 yield mol, ket @cached_property @@ -59,8 +59,6 @@ def _sugar_groups(self): def __enumerate_bonds(self: 'MoleculeContainer', partial): atoms = self._atoms bonds = self._bonds - hydrogens = self._hydrogens - hybridization = self.hybridization rings = self.atoms_rings_sizes # search neutral oxygen and nitrogen @@ -83,11 +81,12 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): if partial and path and not len(path) % 2 and \ (hydrogen or # enol > ketone - hydrogens[(x := path[-1][1])] and (x not in rings or all(x > 7 for x in rings[x]))): # ketone> + atoms[(x := path[-1][1])].implicit_hydrogens and + (x not in rings or all(x > 7 for x in rings[x]))): # ketone> # return partial hops. ignore allenes in small rings. yield path, hydrogen if len(path) > depth: # fork found - if not partial and not len(path) % 2 and (hydrogen or hydrogens[path[-1][1]]): + if not partial and not len(path) % 2 and (hydrogen or atoms[path[-1][1]].implicit_hydrogens): # end of path found. return it and start new one. yield path, hydrogen seen.difference_update(x for _, x, _ in path[depth:]) @@ -110,32 +109,32 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): continue elif n in anti: # enol-ketone switch if current in anti[n]: - if hydrogens: - if b.order == 2: + if hydrogen: + if b == 2: cp = path.copy() cp.append((current, n, 1)) yield cp, True - elif b.order == 1: + elif b == 1: cp = path.copy() cp.append((current, n, 2)) yield cp, False - elif b.order == bond and atoms[n].atomic_number == 6: # classic keto-enol route - hb = hybridization(n) - if hb == 2: # grow up + elif b.order == bond and (a := atoms[n]).atomic_number == 6: # classic keto-enol route + if a.hybridization == 2: # grow up stack.append((current, n, next_bond, depth)) elif hydrogen: - if hb == 3: # OC=CC=C=C case + if a.hybridization == 3: # OC=CC=C=C case cp = path.copy() cp.append((current, n, 1)) yield cp, True # ketone found - elif hb == 1 and hydrogens[n]: # ketone >> enol + elif a.hybridization == 1 and a.implicit_hydrogens: # ketone >> enol cp = path.copy() cp.append((current, n, 2)) yield cp, False if path and not len(path) % 2 and \ (hydrogen or # enol > ketone - hydrogens[(x := path[-1][1])] and (x not in rings or all(x > 7 for x in rings[x]))): + atoms[(x := path[-1][1])].implicit_hydrogens and + (x not in rings or all(x > 7 for x in rings[x]))): yield path, hydrogen From 109c8de189a2af6ac5c423a73fc232a34cb7b54b Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 16:36:08 +0100 Subject: [PATCH 29/51] fixes --- chython/algorithms/tautomers/acid_base.py | 2 +- chython/algorithms/tautomers/heteroarenes.py | 2 +- chython/algorithms/tautomers/keto_enol.py | 23 +++++++++++--------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/chython/algorithms/tautomers/acid_base.py b/chython/algorithms/tautomers/acid_base.py index c901cbcd..4323b0c8 100644 --- a/chython/algorithms/tautomers/acid_base.py +++ b/chython/algorithms/tautomers/acid_base.py @@ -44,7 +44,7 @@ def neutralize(self: 'MoleculeContainer', *, keep_charge=True, logging=False, return [] return False - self._atoms.update(mol._atoms) + self._atoms = mol._atoms self.flush_cache(keep_sssr=True, keep_components=True) if _fix_stereo: self.fix_stereo() diff --git a/chython/algorithms/tautomers/heteroarenes.py b/chython/algorithms/tautomers/heteroarenes.py index 3e6ac345..4115d6a3 100644 --- a/chython/algorithms/tautomers/heteroarenes.py +++ b/chython/algorithms/tautomers/heteroarenes.py @@ -37,7 +37,7 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): rings = defaultdict(list) # aromatic skeleton for n, m_bond in bonds.items(): for m, bond in m_bond.items(): - if bond.order == 4: + if bond == 4: rings[n].append(m) if not rings: return diff --git a/chython/algorithms/tautomers/keto_enol.py b/chython/algorithms/tautomers/keto_enol.py index f9fd582b..ddcd14d7 100644 --- a/chython/algorithms/tautomers/keto_enol.py +++ b/chython/algorithms/tautomers/keto_enol.py @@ -44,8 +44,12 @@ def _enumerate_keto_enol_tautomers(self: Union['MoleculeContainer', 'KetoEnol'], for n, m, b in fix: m_bonds[n][m]._order = b - mol._atoms[a]._implicit_hydrogens += 1 - mol._atoms[d]._implicit_hydrogens -= 1 + a = mol._atoms[a] + d = mol._atoms[d] + a._implicit_hydrogens += 1 + d._implicit_hydrogens -= 1 + a._hybridization -= 1 # -C=X>=C-X or -C=C=X>=C-C=X + d._hybridization += 1 yield mol, ket @cached_property @@ -108,17 +112,16 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): elif n in seen: # aromatic ring destruction. pyridine double bonds shift continue elif n in anti: # enol-ketone switch - if current in anti[n]: + if current in anti[n]: # keton or enol bond if hydrogen: - if b == 2: - cp = path.copy() - cp.append((current, n, 1)) - yield cp, True - elif b == 1: cp = path.copy() - cp.append((current, n, 2)) + cp.append((current, n, 1)) # double to single in keton end + yield cp, True + else: + cp = path.copy() + cp.append((current, n, 2)) # single to double in enol end yield cp, False - elif b.order == bond and (a := atoms[n]).atomic_number == 6: # classic keto-enol route + elif b == bond and (a := atoms[n]).atomic_number == 6: # classic keto-enol route if a.hybridization == 2: # grow up stack.append((current, n, next_bond, depth)) elif hydrogen: From d52d062620e58bf9761cd4662504665877e4c665 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 18:47:20 +0100 Subject: [PATCH 30/51] bond assessment streamlined through operator overloading --- chython/algorithms/aromatics/kekule.py | 9 +++--- chython/algorithms/aromatics/thiele.py | 5 ++-- chython/algorithms/depict.py | 7 ++--- chython/algorithms/isomorphism.py | 14 ++++----- chython/algorithms/smiles.py | 12 ++++---- chython/algorithms/standardize/molecule.py | 12 ++++---- chython/algorithms/standardize/resonance.py | 4 +-- chython/algorithms/x3dom.py | 7 ++--- chython/containers/bonds.py | 6 ++-- chython/containers/molecule.py | 33 ++++++++++----------- chython/files/_convert.py | 4 +-- 11 files changed, 52 insertions(+), 61 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 6848638c..13905644 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -113,12 +113,11 @@ def __prepare_rings(self: 'MoleculeContainer'): triple_bonded = set() for n, m_bond in bonds.items(): for m, bond in m_bond.items(): - bo = bond.order - if bo == 4: + if bond == 4: rings[n].append(m) - elif bo == 2: + elif bond == 2: double_bonded[n].append(m) - elif bo == 3: + elif bond == 3: triple_bonded.add(n) if not rings: @@ -160,7 +159,7 @@ def __prepare_rings(self: 'MoleculeContainer'): if m not in seen: rings[n].remove(m) rings[m].remove(n) - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 if any(len(ms) not in (2, 3) for ms in rings.values()): raise InvalidAromaticRing('not in ring aromatic bond or hypercondensed rings: ' diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index f236e887..c8034bcb 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -127,8 +127,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: return False # check out-of-ring double bonds - double_bonded = {n for n in rings if any(m not in rings[n] and b.order == 2 - for m, b in bonds[n].items())} + double_bonded = {n for n in rings if any(m not in rings[n] and b == 2 for m, b in bonds[n].items())} # fix_tautomers if fix_tautomers and acceptors and donors: @@ -157,7 +156,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: seen.add(current) new_order = 1 if order == 2 else 2 stack.extend((current, n, depth, new_order) for n in rings[current] if - n not in seen and n not in double_bonded and bonds[current][n].order == order) + n not in seen and n not in double_bonded and bonds[current][n] == order) else: # path not found continue for n, m, o in path: diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index a48eb6c7..73cf2319 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -271,17 +271,16 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): for n, m, bond in self.bonds(): if m in wedge[n]: continue - order = bond.order nx, ny = atoms[n].xy mx, my = atoms[m].xy ny, my = -ny, -my - if order in (1, 4): + if bond in (1, 4): svg.append(f' ') - elif order == 2: + elif bond == 2: dx, dy = _rotate_vector(0, double_space, mx - nx, ny - my) svg.append(f' ') svg.append(f' ') - elif order == 3: + elif bond == 3: dx, dy = _rotate_vector(0, triple_space, mx - nx, ny - my) svg.append(f' ') svg.append(f' ') diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index a6ddea3e..30243690 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -245,15 +245,14 @@ def _cython_compiled_structure(self: 'MoleculeContainer'): for j, (m, b) in enumerate(ms.items(), start): indices[j] = x = mapping[m] v = bits1[x] - o = b.order - if o == 1: + if b == 1: v |= 0x0800000000000000 - elif o == 4: - v |= 0x4000000000000000 - elif o == 2: + elif b == 2: v |= 0x1000000000000000 - elif o == 3: + elif b == 3: v |= 0x2000000000000000 + elif b == 4: + v |= 0x4000000000000000 else: v |= 0x8000000000000000 v |= 0x0400000000000000 if b.in_ring else 0x0200000000000000 @@ -488,8 +487,7 @@ def _get_automorphism_mapping(atoms: Dict[int, int], bonds: Dict[int, Dict[int, return # all atoms unique components, closures = _compile_query(atoms, bonds) - mappers = [_get_mapping(order, closures, atoms, bonds, {x for x, *_ in order}) - for order in components] + mappers = [_get_mapping(order, closures, atoms, bonds, {x for x, *_ in order}) for order in components] if len(mappers) == 1: for mapping in mappers[0]: if any(k != v for k, v in mapping.items()): diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index fc0e7d01..8569ff1f 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -452,12 +452,12 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjacency, **kwargs): if not kwargs.get('bonds', True): return '' - order = self._bonds[n][m].order - if order == 4: + bond = self._bonds[n][m] + if bond == 4: if kwargs.get('aromatic', True): return '' return ':' - elif order == 1: # cis-trans /\ + elif bond == 1: # cis-trans /\ if kwargs.get('aromatic', True) and self._atoms[n].hybridization == self._atoms[m].hybridization == 4: return '-' if kwargs.get('stereo', True): @@ -469,11 +469,11 @@ def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjac if (x := ct_map.get((n, m))) is not None: return '/' if x else '\\' return '' - elif order == 2: + elif bond == 2: return '=' - elif order == 3: + elif bond == 3: return '#' - else: # order == 8 + else: # bond == 8 return '~' def __ct_map(self: 'MoleculeContainer', adjacency): diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 049671a2..a69db682 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -235,7 +235,7 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol continue ch = ch[0][0] ca = [n for n in r if atoms[n].atomic_number == 6 and - (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b.order == 1 for b in bonds[n].values()))] + (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b == 1 for b in bonds[n].values()))] if len(ca) < 2 or ch not in ca: continue atoms[ch]._charge = 0 # reset charge for morgan recalculation @@ -268,7 +268,7 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, """ bonds = self._bonds - ab = [(n, m) for n, m, b in self.bonds() if b.order == 8] + ab = [(n, m) for n, m, b in self.bonds() if b == 8] if keep_to_terminal: skeleton = self.not_special_connectivity @@ -303,10 +303,10 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo if len(bonds[n]) > 1: raise ValenceError(f'Hydrogen atom {n} has invalid valence. Try to use remove_coordinate_bonds()') for m, b in bonds[n].items(): - if b.order == 1: + if b == 1: if atoms[m].atomic_number != 1: # not H-H explicit[m].append(n) - elif b.order != 8: + elif b != 8: raise ValenceError(f'Hydrogen atom {n} has invalid valence {b.order}.') to_remove = set() @@ -319,7 +319,7 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo explicit_sum = 0 explicit_dict = defaultdict(int) for m, bond in bonds[n].items(): - if m not in hi and bond.order != 8: + if m not in hi and bond != 8: explicit_sum += bond.order explicit_dict[(bond.order, atoms[m].atomic_number)] += 1 try: @@ -454,7 +454,7 @@ def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): hs.add(m) if m in bonds[n]: b = bonds[n][m] - if b.order == 8 or b == 8: + if b == 8 or bo == 8: keep_sssr = False b._order = bo else: # new bond diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 31f0a0da..d703083f 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -154,8 +154,8 @@ def __entries(self: 'MoleculeContainer'): (n1, b1), (n2, b2) = bonds[n].items() an1 = atoms[n1] an2 = atoms[n2] - if b1.order == b2.order == 2 and (an1.charge == -1 and an1.atomic_number == 7 or - an2.charge == -1 and an2.atomic_number == 7): + if b1 == b2 == 2 and (an1.charge == -1 and an1.atomic_number == 7 or + an2.charge == -1 and an2.atomic_number == 7): continue elif lb == 3 and a.hybridization == 2: # X=[N+](-X)-X - prevent N-N migration nitrogen_ani.add(n) diff --git a/chython/algorithms/x3dom.py b/chython/algorithms/x3dom.py index 2118899b..9d59160d 100644 --- a/chython/algorithms/x3dom.py +++ b/chython/algorithms/x3dom.py @@ -221,7 +221,6 @@ def __render_bonds(self: 'MoleculeContainer', xyz): doubles = {} half_triple = triple_space / 2 for n, m, bond in self.bonds(): - order = bond.order nx, ny, nz = xyz[n] mx, my, mz = xyz[m] @@ -233,13 +232,13 @@ def __render_bonds(self: 'MoleculeContainer', xyz): rotation_angle = acos(nmy / length) lengths[(n, m)] = lengths[(m, n)] = (length, rotation_angle) x, y, z = nx + nmx / 2, ny + nmy / 2, nz + nmz / 2 - if order in (1, 4): + if bond in (1, 4): xml.append(f" \n \n \n" f" \n \n" f" \n \n" " \n \n \n") - elif order == 2: + elif bond == 2: if n in doubles: # normal for plane n m o norm_x, norm_y, norm_z = plane_normal(nmx, nmy, nmz, *doubles[n]) @@ -286,7 +285,7 @@ def __render_bonds(self: 'MoleculeContainer', xyz): f" \n \n" f" \n \n" " \n \n \n") - elif order == 3: + elif bond == 3: nox, noy, noz = vector_normal(nmx, nmy, nmz) # normal for plane n m o diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index a6ce7721..43847d51 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -31,10 +31,10 @@ def __init__(self, order: int): self._stereo = None def __eq__(self, other): - if isinstance(other, Bond): - return self.order == other.order - elif isinstance(other, int): + if isinstance(other, int): return self.order == other + elif isinstance(other, Bond): + return self.order == other.order return False def __repr__(self): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index d56c122d..474490ee 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -186,7 +186,7 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): bond = Bond(bond) super().add_bond(n, m, bond) - if bond.order == 8: + if bond == 8: return # any bond doesn't change anything if self._changed is None: self._changed = {n, m} @@ -208,7 +208,7 @@ def delete_atom(self, n: int, *, _skip_calculation=False): del self._atoms[n] for m, bond in self._bonds.pop(n).items(): del self._bonds[m][n] - if bond.order == 8: + if bond == 8: continue if self._changed is None: self._changed = {m} @@ -227,7 +227,7 @@ def delete_bond(self, n: int, m: int, *, _skip_calculation=False): Call `kekule()` and `thiele()` in sequence to fix marks. """ del self._bonds[n][m] - if self._bonds[m].pop(n).order != 8: + if self._bonds[m].pop(n) != 8: if self._changed is None: self._changed = {n, m} else: @@ -727,15 +727,14 @@ def calc_labels(self): for m, bond in m_bond.items(): bond._in_ring = anr and (amr := atoms_rings.get(m) or False) and not anr.isdisjoint(amr) # have common rings - order = bond.order - if order == 8: + if bond == 8: continue - elif order == 4: + elif bond == 4: hybridization = 4 elif hybridization != 4: - if order == 3: + if bond == 3: hybridization = 3 - elif order == 2: + elif bond == 2: if hybridization == 1: hybridization = 2 elif hybridization == 2: @@ -769,16 +768,15 @@ def calc_implicit(self, n: int): explicit_dict = defaultdict(int) aroma = 0 for m, bond in self._bonds[n].items(): - order = bond.order - if order == 4: # only neutral carbon aromatic rings supported + if bond == 4: # only neutral carbon aromatic rings supported if not atom.charge and not atom.is_radical and atom.atomic_number == 6: aroma += 1 else: # use `kekule()` to calculate proper implicit hydrogens count atom._implicit_hydrogens = None return - elif order != 8: # any bond used for complexes - explicit_sum += order - explicit_dict[(order, self._atoms[m].atomic_number)] += 1 + elif bond != 8: # any bond used for complexes + explicit_sum += bond.order + explicit_dict[(bond.order, self._atoms[m].atomic_number)] += 1 if aroma == 2: if explicit_sum == 0: # H-Ar @@ -818,12 +816,11 @@ def check_implicit(self, n: int, h: int) -> bool: explicit_dict = defaultdict(int) for m, bond in self._bonds[n].items(): - order = bond.order - if order == 4: # can't check aromatic rings + if bond == 4: # can't check aromatic rings return False - elif order != 8: # any bond used for complexes - explicit_sum += order - explicit_dict[(order, self._atoms[m].atomic_number)] += 1 + elif bond != 8: # any bond used for complexes + explicit_sum += bond.order + explicit_dict[(bond.order, self._atoms[m].atomic_number)] += 1 try: rules = atom.valence_rules(explicit_sum) diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 6da1ffd6..422a46a9 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -87,7 +87,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False # rare H0 case if (not keep_radicals and not ignore_aromatic_radicals and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) - and sum(b.order != 8 for b in bonds[n].values()) == 2): + and sum(b != 8 for b in bonds[n].values()) == 2): # c[c]c - aromatic B,C,N,P radical a._is_radical = True radicalized.append(n) @@ -107,7 +107,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if a.hybridization == 4: if (not keep_radicals and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) - and sum(b.order != 8 for b in bonds[n].values()) == 2): + and sum(b != 8 for b in bonds[n].values()) == 2): # c[c]c - aromatic B,C,N,P radical a._implicit_hydrogens = 0 a._is_radical = True From 36f6fbdefdd422010eacbf95ab29e2e1c5673783 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 13 Nov 2024 21:25:24 +0100 Subject: [PATCH 31/51] atom matching streamlined through operator overloading. constants added for better readability. --- chython/algorithms/aromatics/kekule.py | 32 ++++++++++----- chython/algorithms/aromatics/thiele.py | 29 +++++++++----- chython/algorithms/smiles.py | 13 ++++-- chython/algorithms/standardize/molecule.py | 17 +++++--- chython/algorithms/standardize/resonance.py | 30 +++++++++----- chython/algorithms/standardize/salts.py | 14 ++++--- chython/algorithms/stereo.py | 42 +++++++++++--------- chython/algorithms/tautomers/heteroarenes.py | 13 ++++-- chython/algorithms/tautomers/keto_enol.py | 6 ++- chython/containers/molecule.py | 24 ++++++----- chython/files/_convert.py | 14 +++++-- chython/periodictable/base/element.py | 4 ++ 12 files changed, 155 insertions(+), 83 deletions(-) diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index 13905644..7f3cbd6c 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -27,6 +27,18 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +P = 15 +S = 16 +As = 33 +Se = 34 +Te = 52 + + class Kekule: __slots__ = () @@ -170,16 +182,14 @@ def __prepare_rings(self: 'MoleculeContainer'): if any(len(rings[n]) != 2 for n in double_bonded): # double bonded never condensed raise InvalidAromaticRing('quinone valence error') for n in double_bonded: - atom = atoms[n] - if atom.atomic_number == 7: + if (atom := atoms[n]) == N: if atom.charge != 1: raise InvalidAromaticRing('quinone should be charged N atom') - elif atom.atomic_number not in (6, 15, 16, 33, 34, 52) or atom.charge: + elif atom not in (C, P, S, As, Se, Te) or atom.charge: raise InvalidAromaticRing('quinone should be neutral S, Se, Te, C, P, As atom') for n in rings: - atom = atoms[n] - if atom.atomic_number == 6: # carbon + if (atom := atoms[n]) == C: # carbon if atom.charge == 0: if atom.neighbors not in (2, 3): raise InvalidAromaticRing @@ -197,14 +207,14 @@ def __prepare_rings(self: 'MoleculeContainer'): raise InvalidAromaticRing else: raise InvalidAromaticRing - elif atom.atomic_number in (7, 15, 33): + elif atom in (N, P, As): if atom.charge == 0: # pyrrole or pyridine. include radical pyrrole if atom.is_radical: if atom.neighbors != 2: # only pyrrole radical raise InvalidAromaticRing double_bonded.add(n) elif atom.neighbors == 3: - if atom.atomic_number == 7: # pyrrole only possible + if atom == N: # pyrrole only possible double_bonded.add(n) else: # P(III) or P(V)H pyrroles.add(n) @@ -215,7 +225,7 @@ def __prepare_rings(self: 'MoleculeContainer'): double_bonded.add(n) elif atom.implicit_hydrogens: # too many hydrogens for aromatic rings raise InvalidAromaticRing - elif atom.neighbors != 4 or atom.atomic_number not in (15, 33): # P(V) in ring [P;a](-R1)-R2 + elif atom.neighbors != 4 or atom not in (P, As): # P(V) in ring [P;a](-R1)-R2 raise InvalidAromaticRing elif atom.charge == -1: # pyrrole only if atom.neighbors != 2 or atom.is_radical: @@ -230,7 +240,7 @@ def __prepare_rings(self: 'MoleculeContainer'): pyrroles.add(n) elif atom.neighbors != 3: # not pyridine oxyde raise InvalidAromaticRing - elif atom.atomic_number == 8: # furan + elif atom == O: # furan if atom.neighbors == 2: if atom.charge == 0: if atom.is_radical: @@ -244,7 +254,7 @@ def __prepare_rings(self: 'MoleculeContainer'): raise InvalidAromaticRing('invalid oxygen charge') else: raise InvalidAromaticRing('Triple-bonded oxygen') - elif atom.atomic_number in (16, 34, 52): # thiophene + elif atom in (S, Se, Te): # thiophene if n not in double_bonded: # not sulphoxyde nor sulphone if atom.neighbors == 2: if atom.is_radical: @@ -267,7 +277,7 @@ def __prepare_rings(self: 'MoleculeContainer'): raise InvalidAromaticRing('S, Se, Te invalid charge ring') else: raise InvalidAromaticRing('S, Se, Te hypervalent ring') - elif atom.atomic_number == 5: # boron + elif atom == B: if atom.charge == 0: if atom.neighbors == 2: if atom.is_radical: # C=1O[B]OC=1 diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index c8034bcb..37ef5704 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -41,6 +41,15 @@ def _freaks(): freak_rules = Proxy(_freaks) +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +P = 15 +S = 16 +Se = 34 + class Thiele: __slots__ = () @@ -68,7 +77,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: if not 3 < lr < 8: # skip 3-membered and big rings continue # only B C N O P S with 2-3 neighbors. detects this: C1=CC=CP12=CC=CC=C2 - if any(atoms[n].atomic_number not in (6, 7, 8, 16, 5, 15) or len(nsc[n]) > 3 for n in ring): + if any(atoms[n] not in (C, N, O, S, B, P) or len(nsc[n]) > 3 for n in ring): continue sp2 = sum(atoms[n].hybridization == 2 for n in ring) if sp2 == lr: # benzene like @@ -76,7 +85,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: tetracycles.append(ring) else: if fix_tautomers and lr % 2: # find potential pyrroles - acceptors.update(n for n in ring if (a := atoms[n]).atomic_number == 7 and not a.charge) + acceptors.update(n for n in ring if (a := atoms[n]) == N and not a.charge) n, *_, m = ring rings[n].add(m) rings[m].add(n) @@ -88,26 +97,24 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: n = next(n for n in ring if atoms[n].hybridization == 1) except StopIteration: # exotic, just skip continue - a = atoms[n] - an = a.atomic_number - if (c := a.charge) == -1: - if an != 6 or lr != 5: # skip any but ferrocene + if (a := atoms[n]).charge == -1: + if a != C or lr != 5: # skip any but ferrocene continue - elif c: # skip any charged + elif a.charge: # skip any charged continue elif lr == 7: # skip electron-rich 7-membered rings - if an != 5: # not B? + if a != 5: # not B? continue # below lr == 5 or 6 only - elif an in (8, 16, 34): # O, S, Se + elif a in (O, S, Se): if len(bonds[n]) != 2: # like CS1(C)C=CC=C1 continue - elif an == 7: + elif a == N: if (b := len(bonds[n])) > 3: # extra check for invalid N(IV) continue elif fix_tautomers and lr == 6 and b == 2: donors.append(n) - elif an in (5, 15): # B, P + elif a in (B, P): if len(bonds[n]) > 3: continue else: # only B, [C-], N, O, P, S, Se diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index 8569ff1f..fecbae0d 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -50,6 +50,13 @@ dyn_radical_str = {(True, True): '*', (True, False): '*>^', (False, True): '^>*'} +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 +S = 16 + class Smiles(ABC): __slots__ = () @@ -424,18 +431,18 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): smi[4] = 'H' elif atom.implicit_hydrogens: smi[4] = f'H{atom.implicit_hydrogens}' - elif atom.hybridization == 4 and atom.implicit_hydrogens and atom.atomic_number in (5, 7, 15): # pyrrole + elif atom.hybridization == 4 and atom.implicit_hydrogens and atom in (B, N, P): # pyrrole smi[0] = '[' smi[-1] = ']' if atom.implicit_hydrogens == 1: smi[4] = 'H' else: smi[4] = f'H{atom.implicit_hydrogens}' - elif not atom.implicit_hydrogens and atom.atomic_number in (5, 6, 15, 16) and not self.not_special_connectivity[n]: + elif not atom.implicit_hydrogens and atom in (B, C, P, S) and not self.not_special_connectivity[n]: # elemental B, C, P, S smi[0] = '[' smi[-1] = ']' - elif atom.implicit_hydrogens and atom.atomic_number == 15 and atom.hybridization != 1: + elif atom.implicit_hydrogens and atom == P and atom.hybridization != 1: smi[0] = '[' smi[-1] = ']' if atom.implicit_hydrogens == 1: diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index a69db682..5f4e5e77 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -25,13 +25,18 @@ from ._metal_organics import rules as metal_rules from ...containers.bonds import Bond from ...exceptions import ValenceError, ImplementationError -from ...periodictable import H +from ...periodictable import H as _H if TYPE_CHECKING: from chython import MoleculeContainer +# atomic number constants +H = 5 +C = 6 + + class Standardize: __slots__ = () @@ -234,7 +239,7 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol if len(ch) != 1 or ch[0][1] != -1: continue ch = ch[0][0] - ca = [n for n in r if atoms[n].atomic_number == 6 and + ca = [n for n in r if atoms[n] == C and (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b == 1 for b in bonds[n].values()))] if len(ca) < 2 or ch not in ca: continue @@ -272,7 +277,7 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, if keep_to_terminal: skeleton = self.not_special_connectivity - hs = {n for n, a in self._atoms.items() if a.atomic_number == 1 and not skeleton[n]} + hs = {n for n, a in self._atoms.items() if a == H and not skeleton[n]} ab = [(n, m) for n, m in ab if n not in hs and m not in hs] for n, m in ab: @@ -299,12 +304,12 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo explicit = defaultdict(list) for n, atom in atoms.items(): - if atom.atomic_number == 1 and (atom.isotope is None or atom.isotope == 1): + if atom == H and (atom.isotope is None or atom.isotope == 1): if len(bonds[n]) > 1: raise ValenceError(f'Hydrogen atom {n} has invalid valence. Try to use remove_coordinate_bonds()') for m, b in bonds[n].items(): if b == 1: - if atoms[m].atomic_number != 1: # not H-H + if atoms[m] != H: # not H-H explicit[m].append(n) elif b != 8: raise ValenceError(f'Hydrogen atom {n} has invalid valence {b.order}.') @@ -374,7 +379,7 @@ def explicify_hydrogens(self: 'MoleculeContainer', *, start_map=None, _return_ma bonds = self._bonds m = start_map if start_map is not None else max(atoms) + 1 for n in to_add: - atoms[m] = H(implicit_hydrogens=0) + atoms[m] = _H(implicit_hydrogens=0) bonds[n][m] = b = Bond(1) bonds[m] = {n: b} atoms[n]._implicit_hydrogens = 0 diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index d703083f..2283540f 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -24,6 +24,19 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +Si = 14 +P = 15 +S = 16 +As = 33 +Se = 34 +Te = 52 + + class Resonance: __slots__ = () @@ -132,36 +145,35 @@ def __entries(self: 'MoleculeContainer'): nitrogen_ani = set() sulfur_cat = set() for n, a in atoms.items(): - if a.atomic_number not in {5, 6, 7, 8, 14, 15, 16, 33, 34, 52}: + if a not in (B, C, N, O, Si, P, S, As, Se, Te): # filter non-organic set, halogens and aromatics continue elif a.is_radical: rads.add(n) elif a.charge == -1: - if (lb := len(bonds[n])) == 4 and a.atomic_number == 5: # skip boron + if (lb := len(bonds[n])) == 4 and a == B: # skip boron continue - elif lb == 6 and a.atomic_number == 15: # skip [P-]X6 + elif lb == 6 and a == P: # skip [P-]X6 continue if n in errors: # only valid anions accepted continue entries.add(n) elif a.charge == 1: lb = len(bonds[n]) - if a.atomic_number == 7: + if a == N: if lb == 4: # skip ammonia continue elif lb == 2 and a.hybridization == 3: # skip Azide (n1, b1), (n2, b2) = bonds[n].items() an1 = atoms[n1] an2 = atoms[n2] - if b1 == b2 == 2 and (an1.charge == -1 and an1.atomic_number == 7 or - an2.charge == -1 and an2.atomic_number == 7): + if b1 == b2 == 2 and (an1.charge == -1 and an1 == N or an2.charge == -1 and an2 == N): continue elif lb == 3 and a.hybridization == 2: # X=[N+](-X)-X - prevent N-N migration nitrogen_ani.add(n) - elif a.atomic_number == 15 and lb == 4: # skip [P+]R4 + elif a == P and lb == 4: # skip [P+]R4 continue - elif a.atomic_number == 16: + elif a == S: if lb == 2 and a.hybridization == 2: # ad-hoc for X-[S+]=X sulfur_cat.add(n) elif lb == 3 and a.hybridization == 1: # ad-hoc for X-[S+](-X)-X @@ -171,7 +183,7 @@ def __entries(self: 'MoleculeContainer'): if exits or entries: # try to move cation to nitrogen. saturation fixup. for n, a in self._atoms.items(): - if a.atomic_number == 7 and not a.charge: + if a == N and not a.charge: if a.hybridization == 1 and a.neighbors <= 3: # any amine - potential e-donor entries.add(n) nitrogen_cat.add(n) diff --git a/chython/algorithms/standardize/salts.py b/chython/algorithms/standardize/salts.py index d281b593..48cec856 100644 --- a/chython/algorithms/standardize/salts.py +++ b/chython/algorithms/standardize/salts.py @@ -18,12 +18,18 @@ # from typing import TYPE_CHECKING, List, Tuple, Union from ._salts import acids, rules +from ...periodictable import GroupI, GroupII if TYPE_CHECKING: from chython import MoleculeContainer +# atomic number constants +H = 1 +N = 7 + + class Salts: __slots__ = () @@ -38,7 +44,7 @@ def remove_metals(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Li metals = [] for n, a in atoms.items(): - if a.atomic_number in {7, 3, 4, 11, 12, 19, 20, 37, 38, 55, 56} and not bonds[n]: + if not bonds[n] and (a == N or isinstance(a, (GroupI, GroupII)) and a != H): metals.append(n) if 0 < len(metals) < len(self): @@ -84,16 +90,14 @@ def remove_acids(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Lis def split_metal_salts(self: 'MoleculeContainer', *, logging=False) -> Union[bool, List[Tuple[int, int]]]: """ - Split connected S-metal/lanthanides/actinides salts to cation/anion pairs. + Split connected S-metal salts to cation/anion pairs. :param logging: return deleted bonds list. """ atoms = self._atoms bonds = self._bonds - metals = [n for n, a in atoms.items() if a.atomic_number in - {3, 4, 11, 12, 19, 20, 37, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 87, 88, - 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102}] + metals = [n for n, a in atoms.items() if isinstance(a, (GroupI, GroupII)) and a != H] if metals: acceptors = set() log = [] diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index 7421d3f5..e243d6a2 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -33,6 +33,10 @@ from chython import MoleculeContainer +# atomic number constants +H = 1 +C = 6 + # 1 2 # \ | # \| @@ -165,7 +169,7 @@ def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: """ tetra = [] for n, atom in self._atoms.items(): - if atom.atomic_number == 6 and not atom.charge and not atom.is_radical: + if atom == C and not atom.charge and not atom.is_radical: env = self._bonds[n] if all(b == 1 for b in env.values()): if sum(int(b) for b in env.values()) > 4: @@ -227,7 +231,7 @@ def stereogenic_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple for n in self.tetrahedrons: if any(not atoms[x].is_forming_single_bonds for x in bonds[n]): continue # skip metal-carbon complexes - env = tuple(x for x in bonds[n] if atoms[x].atomic_number != 1) + env = tuple(x for x in bonds[n] if atoms[x] != H) if len(env) in (3, 4): tetrahedrons[n] = env return tetrahedrons @@ -255,8 +259,8 @@ def stereogenic_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, ...], Tu if any(b == 3 or not atoms[m].is_forming_single_bonds and b != 8 for m, b in nl.items() if m != m1): continue # skip X=C=C structures and metal-carbon complexes - nn = [x for x, b in nf.items() if x != n1 and atoms[x].atomic_number != 1 and b != 8] - mn = [x for x, b in nl.items() if x != m1 and atoms[x].atomic_number != 1 and b != 8] + nn = [x for x, b in nf.items() if x != n1 and atoms[x] != H and b != 8] + mn = [x for x, b in nl.items() if x != m1 and atoms[x] != H and b != 8] if nn and mn: sn = nn[1] if len(nn) == 2 else None sm = mn[1] if len(mn) == 2 else None @@ -405,7 +409,7 @@ def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cac t1, t2 = self._stereo_allenes_terminals[c] order = self.stereogenic_allenes[c] - if atoms[m].atomic_number == 1: + if atoms[m] == H: if t1 == n: m1 = order[1] else: @@ -436,7 +440,7 @@ def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cac elif n in self.chiral_tetrahedrons: th = self.stereogenic_tetrahedrons[n] am = atoms[m] - if am.atomic_number == 1: + if am == H: order = [] for x in th: ax = atoms[x] @@ -704,7 +708,7 @@ def _translate_tetrahedron_sign(self: 'MoleculeContainer', n, env, s=None): if len(env) == 4: # hydrogen atom passed to env # hydrogen always last in order try: - order = (*order, next(x for x in env if self._atoms[x].atomic_number == 1)) # see translate scheme + order = (*order, next(x for x in env if self._atoms[x] == H)) # see translate scheme except StopIteration: raise KeyError elif len(env) != 3: # pyramid or tetrahedron expected @@ -744,7 +748,7 @@ def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): t0 = 0 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm] == H: t1 = 3 else: raise KeyError @@ -752,23 +756,23 @@ def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): t0 = 1 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm] == H: t1 = 2 else: raise KeyError - elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: + elif nn == n2 or n2 is None and self._atoms[nn] == H: t0 = 2 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm] == H: t1 = 3 else: raise KeyError - elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: + elif nn == n3 or n3 is None and self._atoms[nn] == H: t0 = 3 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm] == H: t1 = 2 else: raise KeyError @@ -798,7 +802,7 @@ def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): t0 = 0 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm] == H: t1 = 3 else: raise KeyError @@ -806,23 +810,23 @@ def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): t0 = 1 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm] == H: t1 = 2 else: raise KeyError - elif nn == n2 or n2 is None and self._atoms[nn].atomic_number == 1: + elif nn == n2 or n2 is None and self._atoms[nn] == H: t0 = 2 if nm == n1: t1 = 1 - elif nm == n3 or n3 is None and self._atoms[nm].atomic_number == 1: + elif nm == n3 or n3 is None and self._atoms[nm] == H: t1 = 3 else: raise KeyError - elif nn == n3 or n3 is None and self._atoms[nn].atomic_number == 1: + elif nn == n3 or n3 is None and self._atoms[nn] == H: t0 = 3 if nm == n0: t1 = 0 - elif nm == n2 or n2 is None and self._atoms[nm].atomic_number == 1: + elif nm == n2 or n2 is None and self._atoms[nm] == H: t1 = 2 else: raise KeyError diff --git a/chython/algorithms/tautomers/heteroarenes.py b/chython/algorithms/tautomers/heteroarenes.py index 4115d6a3..99a154f4 100644 --- a/chython/algorithms/tautomers/heteroarenes.py +++ b/chython/algorithms/tautomers/heteroarenes.py @@ -27,6 +27,13 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 + + class HeteroArenes: __slots__ = () @@ -48,7 +55,7 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): for n, ms in rings.items(): a = atoms[n] if len(ms) == 2: - if a.atomic_number in (5, 7, 15): + if a in (B, N, P): if not a.charge and not a.is_radical: # only neutral B, N, P if a.implicit_hydrogens: # pyrrole @@ -57,9 +64,9 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): acceptors.add(n) else: single_bonded.add(n) - elif a.charge == -1 and a.atomic_number == 6: # ferrocene + elif a.charge == -1 and a == C: # ferrocene single_bonded.add(n) - elif len(ms) == 3 and a.atomic_number in (5, 7, 15) and not a.charge and not a.is_radical: + elif len(ms) == 3 and a in (B, N, P) and not a.charge and not a.is_radical: single_bonded.add(n) if not donors or not acceptors: return diff --git a/chython/algorithms/tautomers/keto_enol.py b/chython/algorithms/tautomers/keto_enol.py index ddcd14d7..ba80f63b 100644 --- a/chython/algorithms/tautomers/keto_enol.py +++ b/chython/algorithms/tautomers/keto_enol.py @@ -27,6 +27,10 @@ from chython import MoleculeContainer +# atomic number constants +C = 6 + + class KetoEnol: __slots__ = () @@ -121,7 +125,7 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): cp = path.copy() cp.append((current, n, 2)) # single to double in enol end yield cp, False - elif b == bond and (a := atoms[n]).atomic_number == 6: # classic keto-enol route + elif b == bond and (a := atoms[n]) == C: # classic keto-enol route if a.hybridization == 2: # grow up stack.append((current, n, next_bond, depth)) elif hydrogen: diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 474490ee..e6263811 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -40,7 +40,12 @@ from ..algorithms.tautomers import Tautomers from ..algorithms.x3dom import X3domMolecule from ..exceptions import ValenceError -from ..periodictable import DynamicElement, Element, QueryElement, H +from ..periodictable import DynamicElement, Element, QueryElement, H as _H + + +# atomic number constants +H = 5 +C = 6 class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], Morgan, Rings, MoleculeIsomorphism, @@ -134,7 +139,7 @@ def is_radical(self) -> bool: @cached_property def molecular_mass(self) -> float: - h = H().atomic_mass + h = _H().atomic_mass return sum(a.atomic_mass + a.implicit_hydrogens * h for a in self._atoms.values()) @cached_property @@ -291,7 +296,7 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul if as_query: sub = object.__new__(QueryContainer) - lost = {n for n, a in self._atoms.items() if a.atomic_number != 1} - set(atoms) # atoms not in substructure + lost = {n for n, a in self._atoms.items() if a != H} - set(atoms) # atoms not in substructure # atoms with fully present neighbors not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} @@ -741,10 +746,9 @@ def calc_labels(self): hybridization = 3 neighbors += 1 - an = atoms[m].atomic_number - if an == 1: + if (a := atoms[m]) == H: explicit_hydrogens += 1 - elif an != 6: + elif a != C: heteroatoms += 1 atom = atoms[n] atom._neighbors = neighbors @@ -759,8 +763,7 @@ def calc_implicit(self, n: int): """ Set firs possible hydrogens count based on rules """ - atom = self._atoms[n] - if atom.atomic_number == 1: # hydrogen nether has implicit H + if (atom := self._atoms[n]) == H: # hydrogen nether has implicit H atom._implicit_hydrogens = 0 return @@ -769,7 +772,7 @@ def calc_implicit(self, n: int): aroma = 0 for m, bond in self._bonds[n].items(): if bond == 4: # only neutral carbon aromatic rings supported - if not atom.charge and not atom.is_radical and atom.atomic_number == 6: + if not atom.charge and not atom.is_radical and atom == C: aroma += 1 else: # use `kekule()` to calculate proper implicit hydrogens count atom._implicit_hydrogens = None @@ -808,8 +811,7 @@ def calc_implicit(self, n: int): atom._implicit_hydrogens = None # rule not found def check_implicit(self, n: int, h: int) -> bool: - atom = self._atoms[n] - if atom.atomic_number == 1: # hydrogen nether has implicit H + if (atom := self._atoms[n]) == H: # hydrogen nether has implicit H return h == 0 explicit_sum = 0 diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 422a46a9..c23ee616 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -22,6 +22,13 @@ from ..periodictable import Element +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 + + def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False, keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, ignore=True, ignore_carbon_radicals=False, _cls=MoleculeContainer): @@ -86,7 +93,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False a._implicit_hydrogens = h # rare H0 case if (not keep_radicals and not ignore_aromatic_radicals - and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) + and not h and not a.charge and not a.is_radical and a in (B, C, N, P) and sum(b != 8 for b in bonds[n].values()) == 2): # c[c]c - aromatic B,C,N,P radical a._is_radical = True @@ -106,7 +113,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False elif h != a.implicit_hydrogens: # H count mismatch. if a.hybridization == 4: if (not keep_radicals - and not h and not a.charge and not a.is_radical and a.atomic_number in (5, 6, 7, 15) + and not h and not a.charge and not a.is_radical and a in (B, C, N, P) and sum(b != 8 for b in bonds[n].values()) == 2): # c[c]c - aromatic B,C,N,P radical a._implicit_hydrogens = 0 @@ -139,8 +146,7 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if ignore_carbon_radicals: for n in radicalized: - a = atoms[n] - if a.atomic_number == 6: + if (a := atoms[n]) == C: a._is_radical = False a._implicit_hydrogens += 1 data['log'].append(f'carbon radical {n} replaced with implicit hydrogen') diff --git a/chython/periodictable/base/element.py b/chython/periodictable/base/element.py index 7818af9a..1185d661 100644 --- a/chython/periodictable/base/element.py +++ b/chython/periodictable/base/element.py @@ -334,6 +334,10 @@ def __eq__(self, other): """ compare attached to molecules elements """ + if isinstance(other, int): + return self.atomic_number == other + elif isinstance(other, str): + return self.atomic_symbol == other return isinstance(other, Element) and self.atomic_number == other.atomic_number and \ self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical From 80d21299d0582be126b34e923336ed42a3031fac Mon Sep 17 00:00:00 2001 From: stsouko Date: Thu, 14 Nov 2024 09:09:31 +0100 Subject: [PATCH 32/51] saved --- chython/algorithms/aromatics/_rules.py | 18 ++++++- chython/algorithms/aromatics/thiele.py | 17 +------ chython/algorithms/isomorphism.py | 67 ++++++++++++++++++++++++-- chython/files/_mdl/stereo.py | 2 +- chython/files/libinchi/wrapper.py | 8 +-- 5 files changed, 84 insertions(+), 28 deletions(-) diff --git a/chython/algorithms/aromatics/_rules.py b/chython/algorithms/aromatics/_rules.py index 02b061aa..49b69cd6 100644 --- a/chython/algorithms/aromatics/_rules.py +++ b/chython/algorithms/aromatics/_rules.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -104,7 +104,21 @@ def _rules(): return rules +def _freaks(): + from ... import smarts + + rules = [] + + q = smarts('[N,O,S;D2;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') + rules.append(q) + + q = smarts('[N;D3;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') + rules.append(q) + return rules + + rules = Proxy(_rules) +freak_rules = Proxy(_freaks) -__all__ = ['rules'] +__all__ = ['rules', 'freak_rules'] diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 37ef5704..c6682247 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -17,8 +17,8 @@ # along with this program; if not, see . # from collections import defaultdict -from lazy_object_proxy import Proxy from typing import TYPE_CHECKING +from ._rules import freak_rules from ..rings import _sssr, _connected_components @@ -26,21 +26,6 @@ from chython import MoleculeContainer -def _freaks(): - from ... import smarts - - rules = [] - - q = smarts('[N,O,S;D2;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') - rules.append(q) - - q = smarts('[N;D3;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') - rules.append(q) - return rules - - -freak_rules = Proxy(_freaks) - # atomic number constants B = 5 C = 6 diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 30243690..2a64bdf8 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -156,9 +156,9 @@ def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: boo :param automorphism_filter: Skip matches to the same atoms. :param searching_scope: substructure atoms list to localize isomorphism. """ - if isinstance(other, MoleculeIsomorphism): - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) - raise TypeError('MoleculeContainer expected') + if not isinstance(other, MoleculeIsomorphism): + raise TypeError('MoleculeContainer expected') + return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) @cached_property def _cython_compiled_structure(self: 'MoleculeContainer'): @@ -299,6 +299,67 @@ def get_mapping(query, scope): return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, components=components, get_mapping=get_mapping) + atoms_stereo = self._atoms_stereo + allenes_stereo = self._allenes_stereo + cis_trans_stereo = self._cis_trans_stereo + + other_atoms_stereo = other._atoms_stereo + other_allenes_stereo = other._allenes_stereo + other_cis_trans_stereo = other._cis_trans_stereo + other_translate_tetrahedron_sign = other._translate_tetrahedron_sign + other_translate_allene_sign = other._translate_allene_sign + other_translate_cis_trans_sign = other._translate_cis_trans_sign + + tetrahedrons = self.stereogenic_tetrahedrons + cis_trans = self.stereogenic_cis_trans + allenes = self.stereogenic_allenes + + oatoms = other._atoms + + for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter, + searching_scope=searching_scope): + for n, a in self._atoms.items(): + if a.stereo is None: + continue + m = mapping[n] + oa = oatoms[m] + if oa.stereo is None: # stereo in query should match only stereo atom + break + other._translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) + for n, s in atoms_stereo.items(): + m = mapping[n] + if m not in other_atoms_stereo: # self stereo atom not stereo in other + break + # translate stereo mark in other in order of self tetrahedron + if other_translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) != s: + break + else: + for n, s in allenes_stereo.items(): + m = mapping[n] + if m not in other_allenes_stereo: # self stereo allene not stereo in other + break + # translate stereo mark in other in order of self allene + nn, nm, *_ = allenes[n] + if other_translate_allene_sign(m, mapping[nn], mapping[nm]) != s: + break + else: + for nm, s in cis_trans_stereo.items(): + n, m = nm + on, om = mapping[n], mapping[m] + if (on, om) not in other_cis_trans_stereo: + if (om, on) not in other_cis_trans_stereo: + break # self stereo cis_trans not stereo in other + else: + nn, nm, *_ = cis_trans[nm] + if other_translate_cis_trans_sign(om, on, mapping[nm], mapping[nn]) != s: + break + else: + nn, nm, *_ = cis_trans[nm] + if other_translate_cis_trans_sign(on, om, mapping[nn], mapping[nm]) != s: + break + else: + yield mapping + @cached_property def _cython_compiled_query(self): # long I: diff --git a/chython/files/_mdl/stereo.py b/chython/files/_mdl/stereo.py index ce9a651c..761b6503 100644 --- a/chython/files/_mdl/stereo.py +++ b/chython/files/_mdl/stereo.py @@ -30,7 +30,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans= log = [] if calc_cis_trans: - molecule.calculate_cis_trans_from_2d() + molecule.calculate_cis_trans_from_2d(clean_cache=False) stereo = [(mapping[n], mapping[m], s) for n, m, s in data['stereo']] while stereo: diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index aaefb948..3941428e 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -53,7 +53,7 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'mapping': 0, 'x': atom.x, 'y': atom.y, 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, - 'hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope, + 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope, 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, 't': atom.implicit_tritium}) for k in range(atom.num_bonds): @@ -92,16 +92,12 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo def postprocess_molecule(molecule, data, *, ignore_stereo=False): atoms = molecule._atoms bonds = molecule._bonds - charges = molecule._charges - radicals = molecule._radicals - hydrogens = molecule._hydrogens - plane = molecule._plane # set hydrogen atoms. INCHI designed for hydrogens handling. hope correctly. free = count(len(atoms) + 1) for n, atom in enumerate(data['atoms'], 1): if atom['element'] != 'H': - hydrogens[n] = atom['hydrogens'] + atoms[n]._implicit_hydrogens = atom['hydrogens'] # in chython hydrogens never have implicit H. elif atom['hydrogens']: # >[xH]-H case m = next(free) From bf8132739934a2586cfc4eed74a458de0b0a2881 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 20 Nov 2024 18:04:40 +0100 Subject: [PATCH 33/51] parsers refactored --- chython/files/MRVrw.py | 45 ++++++++----------- chython/files/PDBrw.py | 9 +++- chython/files/_convert.py | 40 +++++++++++------ chython/files/_mapping.py | 6 +++ chython/files/_mdl/emol.py | 10 ++--- chython/files/_mdl/erxn.py | 4 +- chython/files/_mdl/mol.py | 10 ++--- chython/files/_mdl/rxn.py | 4 +- chython/files/_mdl/stereo.py | 13 +++--- chython/files/_mdl/write.py | 36 ++++++--------- chython/files/daylight/parser.py | 3 +- chython/files/daylight/smiles.py | 9 ++-- chython/files/libinchi/wrapper.py | 73 ++++++++++++++++--------------- 13 files changed, 136 insertions(+), 126 deletions(-) diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index 0a589410..3c808746 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -140,12 +140,12 @@ def read_structure(self, *, current: bool = True): mol = create_molecule(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _cls=self.molecule_cls) if not self.__ignore_stereo: postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) - mol.meta.update(meta) + if meta: + mol.meta.update(meta) return mol elif 'reaction' in data and isinstance(data['reaction'], dict): data = data['reaction'] - tmp = {'reactants': [], 'products': [], 'reagents': [], - 'meta': None, 'log': log, 'title': data.get('@title')} + tmp = {'reactants': [], 'products': [], 'reagents': [], 'log': log, 'title': data.get('@title')} n = 0 for tag, group in (('reactantList', 'reactants'), ('productList', 'products'), ('agentList', 'reagents')): @@ -174,7 +174,8 @@ def read_structure(self, *, current: bool = True): if not self.__ignore_stereo: for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) - rxn.meta.update(meta) + if meta: + rxn.meta.update(meta) return rxn else: raise ValueError('reaction or molecule expected') @@ -263,7 +264,6 @@ def _read_block(self, *, current: bool = True) -> dict: def parse_molecule(data): atoms, bonds, stereo = [], [], [] log = [] - hydrogens = {} atom_map = {} if 'atom' in data['atomArray']: da = data['atomArray']['atom'] @@ -275,20 +275,20 @@ def parse_molecule(data): 'isotope': int(atom['@isotope']) if '@isotope' in atom else None, 'charge': int(atom.get('@formalCharge', 0)), 'is_radical': '@radical' in atom, - 'mapping': int(atom.get('@mrvMap', 0))}) + 'parsed_mapping': int(atom.get('@mrvMap', 0))}) if '@z3' in atom: atoms[-1].update(x=float(atom['@x3']), y=float(atom['@y3']), z=float(atom['@z3'])) else: - atoms[-1].update(x=float(atom['@x2']) / 2, y=float(atom['@y2']) / 2, z=0.) + atoms[-1].update(x=float(atom['@x2']) / 2, y=float(atom['@y2']) / 2) if '@mrvQueryProps' in atom: raise ValueError('queries unsupported') if '@hydrogenCount' in atom: - hydrogens[n] = int(atom['@hydrogenCount']) + atoms[-1]['implicit_hydrogens'] = int(atom['@hydrogenCount']) else: atom = data['atomArray'] for n, (_id, e) in enumerate(zip(atom['@atomID'].split(), atom['@elementType'].split())): atom_map[_id] = n - atoms.append({'element': e, 'charge': 0, 'mapping': 0, 'isotope': None, 'is_radical': False}) + atoms.append({'element': e}) if '@z3' in atom: for a, x, y, z in zip(atoms, atom['@x3'].split(), atom['@y3'].split(), atom['@z3'].split()): a['x'] = float(x) @@ -298,7 +298,6 @@ def parse_molecule(data): for a, x, y in zip(atoms, atom['@x2'].split(), atom['@y2'].split()): a['x'] = float(x) / 2 a['y'] = float(y) / 2 - a['z'] = 0. if '@isotope' in atom: for a, x in zip(atoms, atom['@isotope'].split()): if x != '0': @@ -310,7 +309,7 @@ def parse_molecule(data): if '@mrvMap' in atom: for a, x in zip(atoms, atom['@mrvMap'].split()): if x != '0': - a['mapping'] = int(x) + a['parsed_mapping'] = int(x) if '@radical' in atom: for a, x in zip(atoms, atom['@radical'].split()): if x != '0': @@ -340,8 +339,8 @@ def parse_molecule(data): log.append('incorrect bondStereo tag') bonds.append((atom_map[a1], atom_map[a2], order)) - return {'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': None, 'title': data.get('@title'), 'log': log, 'atom_map': atom_map} + return {'atoms': atoms, 'bonds': bonds, 'stereo': stereo, + 'title': data.get('@title'), 'log': log, 'atom_map': atom_map} def parse_sgroup(data, molecule): @@ -486,30 +485,24 @@ def __write(self, data): file.write('\n') def __write_molecule(self, g): - gp = g._plane - gc = g._charges - gr = g._radicals bg = g._bonds - hg = g._hydrogens - hb = g.hybridization mapping = self.__mapping file = self.__file file.write('') - for n, atom in g._atoms.items(): - x, y = gp[n] - ih = hg[n] + for n, atom in g.atoms(): + x, y = atom.x, atom.y file.write(f'') file.write('') diff --git a/chython/files/PDBrw.py b/chython/files/PDBrw.py index a761e3cb..01ad869f 100644 --- a/chython/files/PDBrw.py +++ b/chython/files/PDBrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -178,6 +178,8 @@ def read_structure(self, *, current: bool = True) -> MoleculeContainer: atom_charge=charges, _cls=self.molecule_cls) mol.meta['RESIDUE'] = dict(enumerate(res, 1)) + if log: + mol.meta['chython_parsing_log'] = log if self.__parse_as_single: self.__parsed_first = mol.copy() return mol @@ -191,6 +193,11 @@ def read_structure(self, *, current: bool = True) -> MoleculeContainer: c[n] = (x, y, z) mol = self.__parsed_first.copy() mol._conformers[0] = c + if log: + if 'chython_parsing_log' in mol.meta: + mol.meta['chython_parsing_log'] = mol.meta['chython_parsing_log'] + log + else: + mol.meta['chython_parsing_log'] = log return mol def close(self, force: bool = False): diff --git a/chython/files/_convert.py b/chython/files/_convert.py index c23ee616..e25a93cd 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -33,12 +33,18 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, ignore=True, ignore_carbon_radicals=False, _cls=MoleculeContainer): g = _cls() + g._name = data.get('title') atoms = g._atoms bonds = g._bonds mapping = data['mapping'] - for n, atom in enumerate(data['atoms']): - n = mapping[n] + + if any(a.get('z') for a in data['atoms']): + # store conformer + g._conformers = [{n: (a['x'], a['y'], a['z']) for n, a in zip(mapping, data['atoms'])}] + + for n, atom in zip(mapping, data['atoms']): e = Element.from_symbol(atom.pop('element')) + atom.pop('z', None) # clean up MDL try: atoms[n] = e(**atom) except (ValueError, TypeError): @@ -60,15 +66,11 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False g.calc_labels() # set all labels except rings - if any(a.get('z') for a in data['atoms']): - # store conformer - g._conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] - - if data['log']: # store log to the meta - if data['meta'] is None: + if data.get('log'): # store log to the meta + if data.get('meta') is None: data['meta'] = {} data['meta']['chython_parsing_log'] = data['log'] - g._meta = data['meta'] + g._meta = data.get('meta') or None if skip_calc_implicit: # don't calc Hs. e.g. INCHI return g @@ -107,6 +109,8 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False elif ignore: # radical state also has errors. a._is_radical = False # reset radical state implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') @@ -121,6 +125,8 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False radicalized.append(n) elif ignore: implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') @@ -135,11 +141,15 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False elif ignore: a._is_radical = False # reset radical state implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') elif ignore: # just ignore it implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') else: raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') @@ -149,10 +159,12 @@ def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False if (a := atoms[n]) == C: a._is_radical = False a._implicit_hydrogens += 1 + if data.get('log') is None: + data['log'] = [] data['log'].append(f'carbon radical {n} replaced with implicit hydrogen') elif radicalized: g.meta['chython_radicalized_atoms'] = radicalized - if data['log'] and 'chython_parsing_log' not in g.meta: + if data.get('log') and 'chython_parsing_log' not in g.meta: g.meta['chython_parsing_log'] = data['log'] if implicit_mismatch: g.meta['chython_implicit_mismatch'] = implicit_mismatch @@ -177,17 +189,19 @@ def create_reaction(data, *, ignore=True, skip_calc_implicit=False, ignore_bad_i except ValueError as e: if not ignore: raise + if data.get('log') is None: + data['log'] = [] data['log'].append(f'ignored {gr} molecule {n} with {e}') tdl.append(n) if tdl: # ad-hoc for later postprocessing for n in reversed(tdl): del pms[n] - if data['log']: # store log to the meta - if data['meta'] is None: + if data.get('log'): # store log to the meta + if data.get('meta') is None: data['meta'] = {} data['meta']['chython_parsing_log'] = data['log'] - return _r_cls(rc, pr, rg, meta=data['meta'], name=data['title']) + return _r_cls(rc, pr, rg, meta=data.get('meta') or None, name=data.get('title')) __all__ = ['create_molecule'] diff --git a/chython/files/_mapping.py b/chython/files/_mapping.py index 331eaa3e..c142676e 100644 --- a/chython/files/_mapping.py +++ b/chython/files/_mapping.py @@ -34,6 +34,8 @@ def postprocess_parsed_molecule(data, *, remap=False, ignore=True): if not ignore: raise MappingError('mapping in molecules should be unique') remapped.append(next(length)) + if data.get('log') is None: + data['log'] = [] data['log'].append(f'mapping in molecule changed from {m} to {remapped[n]}') else: remapped.append(m) @@ -72,6 +74,8 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): raise MappingError('mapping in reagents or products or reactants should be unique') # force remap non unique atoms in molecules. _remap.append(next(length)) + if data.get('log') is None: + data['log'] = [] data['log'].append(f'mapping in {i} changed from {m} to {_remap[-1]}') else: _remap.append(m) @@ -83,6 +87,8 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): e = f'reagents has map intersection with reactants or products: {tmp}' if not ignore: raise MappingError(e) + if data.get('log') is None: + data['log'] = [] data['log'].append(e) maps['reagents'] = [x if x not in tmp else next(length) for x in maps['reagents']] diff --git a/chython/files/_mdl/emol.py b/chython/files/_mdl/emol.py index 9e6b4437..03b15a6a 100644 --- a/chython/files/_mdl/emol.py +++ b/chython/files/_mdl/emol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -36,7 +36,6 @@ def parse_mol_v3000(data, *, _header=True): atoms = [] bonds = [] stereo = [] - hydrogens = {} meta = {} atom_map = {} star_points = [] @@ -95,7 +94,7 @@ def parse_mol_v3000(data, *, _header=True): atom_map[n] = len(atoms) atoms.append({'element': a, 'isotope': i, 'charge': c, 'is_radical': r, - 'x': float(x), 'y': float(y), 'z': float(z), 'mapping': int(m)}) + 'x': float(x), 'y': float(y), 'z': float(z), 'parsed_mapping': int(m)}) for line in data[2 + atom_count: 2 + atom_count + bonds_count]: _, t, a1, a2, *kvs = split(line) @@ -172,14 +171,13 @@ def parse_mol_v3000(data, *, _header=True): d = v.strip('"') if a and f and d: if f == 'MRV_IMPLICIT_H': - hydrogens[a[0]] = int(d[6:]) + atoms[a[0]]['implicit_hydrogens'] = int(d[6:]) else: log.append(f'ignored SGROUP DAT {i}: {a}\t{f}\t{d}') elif _type.startswith('SRU'): raise ValueError('Polymers not supported') - return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': meta or None, 'log': log} + return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'meta': meta, 'log': log} def split(line): # todo optimize diff --git a/chython/files/_mdl/erxn.py b/chython/files/_mdl/erxn.py index 25354f9b..6b707b3e 100644 --- a/chython/files/_mdl/erxn.py +++ b/chython/files/_mdl/erxn.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -61,7 +61,7 @@ def parse_rxn_v3000(data, *, ignore=True): reagents_count -= 1 return {'reactants': molecules[:reactants_count], 'products': molecules[reactants_count:products_count], - 'reagents': molecules[products_count:], 'title': title, 'meta': None, 'log': log} + 'reagents': molecules[products_count:], 'title': title, 'log': log} __all__ = ['parse_rxn_v3000'] diff --git a/chython/files/_mdl/mol.py b/chython/files/_mdl/mol.py index 3e15cbf9..db819f2b 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/_mdl/mol.py @@ -36,7 +36,6 @@ def parse_mol_v2000(data): atoms = [] bonds = [] stereo = [] - hydrogens = {} dat = {} for line in data[4: 4 + atoms_count]: @@ -62,8 +61,8 @@ def parse_mol_v2000(data): isotope = None mapping = line[60:63] - atoms.append({'element': element, 'charge': charge, 'isotope': isotope, 'is_radical': False, - 'mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), + atoms.append({'element': element, 'charge': charge, 'isotope': isotope, + 'parsed_mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), 'z': float(line[20:30]), 'delta_isotope': delta_isotope}) for line in data[4 + atoms_count: 4 + atoms_count + bonds_count]: @@ -133,14 +132,13 @@ def parse_mol_v2000(data): value = x['value'] if len(_atoms) != 1 or _atoms[0] == -1 or not value: raise InvalidV2000(f'MRV_IMPLICIT_H spec invalid {x}') - hydrogens[_atoms[0]] = int(value[6:]) + atoms[_atoms[0]]['implicit_hydrogens'] = int(value[6:]) else: log.append(f'ignored data: {x}') except KeyError: raise InvalidV2000(f'Invalid SGROUP {x}') - return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': None, 'log': log} + return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'log': log} __all__ = ['parse_mol_v2000'] diff --git a/chython/files/_mdl/rxn.py b/chython/files/_mdl/rxn.py index d81ee459..50df40e4 100644 --- a/chython/files/_mdl/rxn.py +++ b/chython/files/_mdl/rxn.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -61,7 +61,7 @@ def parse_rxn_v2000(data, *, ignore=True): reagents_count -= 1 return {'reactants': molecules[:reactants_count], 'products': molecules[reactants_count:products_count], - 'reagents': molecules[products_count:], 'title': title, 'meta': None, 'log': log} + 'reagents': molecules[products_count:], 'title': title, 'log': log} __all__ = ['parse_rxn_v2000'] diff --git a/chython/files/_mdl/stereo.py b/chython/files/_mdl/stereo.py index 761b6503..212cb77d 100644 --- a/chython/files/_mdl/stereo.py +++ b/chython/files/_mdl/stereo.py @@ -23,11 +23,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans= if ignore_stereo: return mapping = data['mapping'] - - if 'chython_parsing_log' in molecule.meta: - log = molecule.meta['chython_parsing_log'] - else: - log = [] + log = [] if calc_cis_trans: molecule.calculate_cis_trans_from_2d(clean_cache=False) @@ -57,8 +53,11 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans= continue break - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log + if log: + if 'chython_parsing_log' not in molecule.meta: + molecule.meta['chython_parsing_log'] = log + else: + molecule.meta['chython_parsing_log'].extend(log) __all__ = ['postprocess_molecule'] diff --git a/chython/files/_mdl/write.py b/chython/files/_mdl/write.py index c6bfc1bd..3319c60d 100644 --- a/chython/files/_mdl/write.py +++ b/chython/files/_mdl/write.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -77,10 +77,7 @@ def _write_molecule(self, g, write3d=None): else: z = 0 - gc = g._charges - gr = g._radicals - gp = g._plane - gb = g._bonds + bonds = g._bonds file = self._file file.write(f'M V30 BEGIN CTAB\nM V30 COUNTS {g.atoms_count} {g.bonds_count} 0 0 0\nM V30 BEGIN ATOM\n') @@ -90,11 +87,10 @@ def _write_molecule(self, g, write3d=None): x, y, z = xyz[m] z = f'{z:.4f}' else: - x, y = gp[m] + x, y = a.x, a.y - c = gc[m] - c = f' CHG={c}' if c else '' - r = ' RAD=2' if gr[m] else '' + c = f' CHG={a.charge}' if a.charge else '' + r = ' RAD=2' if a.is_radical else '' i = f' MASS={a.isotope}' if a.isotope else '' if not self._mapping: @@ -107,7 +103,7 @@ def _write_molecule(self, g, write3d=None): wedge = defaultdict(set) i = 0 # trick for empty wedge_map for i, (n, m, s) in enumerate(g._wedge_map, start=1): - file.write(f'M V30 {i} {gb[n][m].order} {mapping[n]} {mapping[m]} CFG={s == 1 and "1" or "3"}\n') + file.write(f'M V30 {i} {bonds[n][m].order} {mapping[n]} {mapping[m]} CFG={s == 1 and "1" or "3"}\n') wedge[n].add(m) wedge[m].add(n) @@ -130,10 +126,7 @@ def _write_molecule(self, g, write3d=None): else: z = 0. - gc = g._charges - gr = g._radicals - gp = g._plane - gb = g._bonds + bonds = g._bonds file = self._file file.write(f'{g.name}\n\n\n{g.atoms_count:3d}{g.bonds_count:3d} 0 0 0 0 999 V2000\n') @@ -142,9 +135,9 @@ def _write_molecule(self, g, write3d=None): if write3d is not None: x, y, z = xyz[m] else: - x, y = gp[m] + x, y = a.x, a.y - c = charge_map[gc[m]] + c = charge_map[a.charge] if not self._mapping: m = 0 file.write(f'{x:10.4f}{y:10.4f}{z:10.4f} {a.atomic_symbol:3s} 0{c} 0 0 0 0 0 0 0{m:3d} 0 0\n') @@ -152,21 +145,20 @@ def _write_molecule(self, g, write3d=None): atoms = {m: n for n, m in enumerate(g._atoms, start=1)} wedge = defaultdict(set) for n, m, s in g._wedge_map: - file.write(f'{atoms[n]:3d}{atoms[m]:3d} {gb[n][m].order} {s == 1 and "1" or "6"} 0 0 0\n') + file.write(f'{atoms[n]:3d}{atoms[m]:3d} {bonds[n][m].order} {s == 1 and "1" or "6"} 0 0 0\n') wedge[n].add(m) wedge[m].add(n) for n, m, b in g.bonds(): if m not in wedge[n]: file.write(f'{atoms[n]:3d}{atoms[m]:3d} {b.order} 0 0 0 0\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, a in enumerate(g._atoms.values(), start=1): if a.isotope: file.write(f'M ISO 1 {n:3d} {a.isotope:3d}\n') - if gr[m]: + if a.is_radical: file.write(f'M RAD 1 {n:3d} 2\n') # invalid for carbenes - c = gc[m] - if c in (-4, 4): - file.write(f'M CHG 1 {n:3d} {c:3d}\n') + if a.charge in (-4, 4): + file.write(f'M CHG 1 {n:3d} {a.charge:3d}\n') file.write('M END\n') diff --git a/chython/files/daylight/parser.py b/chython/files/daylight/parser.py index 3cab6272..f685a359 100644 --- a/chython/files/daylight/parser.py +++ b/chython/files/daylight/parser.py @@ -147,8 +147,7 @@ def parser(tokens, strong_cycle): elif previous: raise IncorrectSmiles('bond on the end') - return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_bonds': stereo_bonds, 'log': log, - 'title': None, 'meta': None} + return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_bonds': stereo_bonds, 'log': log} __all__ = ['parser'] diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 410df35a..442195f8 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -78,7 +78,7 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: contract = None if '>' in smi: - record = {'reactants': [], 'reagents': [], 'products': [], 'log': log, 'meta': None, 'title': None} + record = {'reactants': [], 'reagents': [], 'products': [], 'log': log} try: reactants, reagents, products = smi.split('>') except ValueError as e: @@ -237,8 +237,11 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): continue break - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log + if log: + if 'chython_parsing_log' not in molecule.meta: + molecule.meta['chython_parsing_log'] = log + else: + molecule.meta['chython_parsing_log'].extend(log) __all__ = ['smiles'] diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 3941428e..215a2ba7 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -24,7 +24,7 @@ from ...containers import MoleculeContainer from ...containers.bonds import Bond from ...exceptions import ValenceError, IsChiral, NotChiral -from ...periodictable import H +from ...periodictable import H as _H try: @@ -33,6 +33,9 @@ from importlib_resources import files, as_file +H = 1 + + def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> MoleculeContainer: """ INCHI string parser @@ -46,15 +49,23 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo raise ValueError('invalid INCHI') atoms, bonds = [], [] + protium = {} + deuterium = {} + tritium = {} seen = set() for n in range(structure.num_atoms): seen.add(n) atom = structure.atom[n] - atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'mapping': 0, 'x': atom.x, 'y': atom.y, + atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'x': atom.x, 'y': atom.y, 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, - 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope, - 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, 't': atom.implicit_tritium}) + 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope}) + if atom.implicit_protium: + protium[n] = atom.implicit_protium + if atom.implicit_deuterium: + deuterium[n] = atom.implicit_deuterium + if atom.implicit_tritium: + tritium[n] = atom.implicit_tritium for k in range(atom.num_bonds): m = atom.neighbor[k] @@ -82,8 +93,9 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo lib.FreeStructFromINCHI(byref(structure)) - tmp = {'atoms': atoms, 'bonds': bonds, 'stereo_atoms': stereo_atoms, 'stereo_allenes': stereo_allenes, 'log': [], - 'stereo_cumulenes': stereo_cumulenes, 'mapping': list(range(1, len(atoms) + 1)), 'title': None, 'meta': None} + tmp = {'atoms': atoms, 'bonds': bonds, 'stereo_atoms': stereo_atoms, 'stereo_allenes': stereo_allenes, + 'stereo_cumulenes': stereo_cumulenes, 'mapping': list(range(1, len(atoms) + 1)), + 'protium': protium, 'deuterium': deuterium, 'tritium': tritium} mol = create_molecule(tmp, skip_calc_implicit=True, _cls=_cls) postprocess_molecule(mol, tmp, ignore_stereo=ignore_stereo) return mol @@ -95,38 +107,27 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): # set hydrogen atoms. INCHI designed for hydrogens handling. hope correctly. free = count(len(atoms) + 1) - for n, atom in enumerate(data['atoms'], 1): - if atom['element'] != 'H': - atoms[n]._implicit_hydrogens = atom['hydrogens'] - # in chython hydrogens never have implicit H. - elif atom['hydrogens']: # >[xH]-H case - m = next(free) - charges[m] = 0 - radicals[m] = False - plane[m] = (0., 0.) - hydrogens[n] = 0 - hydrogens[m] = 0 - atoms[m] = a = H() - a._attach_graph(molecule, m) + to_add = [] + for n, atom in atoms.items(): + # in chython hydrogens never have implicit H. convert to explicit + if atom == H and atom.implicit_hydrogens: + for _ in range(atom.implicit_hydrogens): + to_add.append((n, next(free), _H(implicit_hydrogens=0))) + atom._implicit_hydrogens = 0 + + for n, p in data['protium'].items(): + to_add.append((n + 1, next(free), _H(isotope=1, implicit_hydrogens=0))) + for n, p in data['deuterium'].items(): + to_add.append((n + 1, next(free), _H(isotope=2, implicit_hydrogens=0))) + for n, p in data['tritium'].items(): + to_add.append((n + 1, next(free), _H(isotope=3, implicit_hydrogens=0))) + + if to_add: + for n, m, a in to_add: + atoms[m] = a bonds[n][m] = b = Bond(1) bonds[m] = {n: b} - b._attach_graph(molecule, n, m) - else: # H+, H* or >H-[xH] cases - hydrogens[n] = 0 - # convert isotopic implicit hydrogens to explicit - for i, k in enumerate(('p', 'd', 't'), 1): - if atom[k]: - for _ in range(atom[k]): - m = next(free) - charges[m] = 0 - radicals[m] = False - plane[m] = (0., 0.) - hydrogens[m] = 0 - atoms[m] = a = H(i) - a._attach_graph(molecule, m) - bonds[n][m] = b = Bond(1) - bonds[m] = {n: b} - b._attach_graph(molecule, n, m) + molecule.calc_labels() # reset labels if ignore_stereo or not data['stereo_atoms'] and not data['stereo_cumulenes'] and not data['stereo_allenes']: return From 90176ac8c1404b6cb76fdcb36bcc1532f51045e5 Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 20 Nov 2024 19:51:49 +0100 Subject: [PATCH 34/51] fixes --- chython/containers/reaction.py | 6 +++--- chython/files/MRVrw.py | 1 - chython/files/_mdl/emol.py | 6 +++--- chython/files/_mdl/mol.py | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index bbb6509f..e34185e8 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -277,7 +277,7 @@ def __format__(self, format_spec): sig = [] count = 0 contract = [] - orders = [] + radicals = [] for ml in (self.__reactants, self.__reagents, self.__products): mso = [(m, *m.__format__(format_spec, _return_order=True)) for m in ml] @@ -292,13 +292,13 @@ def __format__(self, format_spec): else: count += 1 - orders.append((m, o)) + radicals.extend(m.atom(n).is_radical for n in o) ss.append(s) sig.append('.'.join(ss)) if not format_spec or '!x' not in format_spec: cx = [] - if r := ','.join(str(n) for n, (m, a) in enumerate((m, a) for m, o in orders for a in o) if m._radicals[a]): + if r := ','.join(str(n) for n, r in enumerate(radicals) if r): cx.append(f'^1:{r}') if contract: cx.append(f"f:{','.join('.'.join(x) for x in contract)}") diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index 3c808746..ab969b21 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -29,7 +29,6 @@ from ..exceptions import EmptyMolecule, EmptyReaction -organic_set = {'B', 'C', 'N', 'O', 'P', 'S', 'Se', 'F', 'Cl', 'Br', 'I'} bond_map = {8: '1" queryType="Any', 4: 'A', 1: '1', 2: '2', 3: '3', 'Any': 8, 'any': 8, 'A': 4, 'a': 4, '1': 1, '2': 2, '3': 3} diff --git a/chython/files/_mdl/emol.py b/chython/files/_mdl/emol.py index 03b15a6a..a5a5475b 100644 --- a/chython/files/_mdl/emol.py +++ b/chython/files/_mdl/emol.py @@ -149,13 +149,13 @@ def parse_mol_v3000(data, *, _header=True): drop = True for line in data[3 + atom_count + bonds_count:]: - if line.startswith('M V30 END CTAB'): + if line.startswith('END CTAB'): break elif drop: - if line.startswith('M V30 BEGIN SGROUP'): + if line.startswith('BEGIN SGROUP'): drop = False continue - elif line.startswith('M V30 END SGROUP'): + elif line.startswith('END SGROUP'): break _, _type, i, *kvs = split(line) diff --git a/chython/files/_mdl/mol.py b/chython/files/_mdl/mol.py index db819f2b..93913a89 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/_mdl/mol.py @@ -122,7 +122,7 @@ def parse_mol_v2000(data): log.append(f'ignored line: {line}') for a in atoms: - if a['is_radical']: # int to bool + if 'is_radical' in a: # int to bool a['is_radical'] = True for x in dat.values(): try: From 6b5d5418cfcf176e30a09a95f41c4faebe85812a Mon Sep 17 00:00:00 2001 From: stsouko Date: Wed, 20 Nov 2024 21:14:35 +0100 Subject: [PATCH 35/51] Refactor container attribute access and remove unused methods Replaced double underscore attributes with single underscore to simplify access. Removed hashed bytes representation method and streamlined `pack` and `unpack` functions across multiple classes by adding shorthand equivalents `pach` and `unpach`. Additionally, updated copyrights and deleted unnecessary import and cache flushes. --- chython/algorithms/mapping/attention.py | 3 +- chython/algorithms/smiles.py | 5 - chython/algorithms/standardize/reaction.py | 21 ++-- chython/containers/molecule.py | 13 +++ chython/containers/reaction.py | 119 ++++++++++----------- 5 files changed, 78 insertions(+), 83 deletions(-) diff --git a/chython/algorithms/mapping/attention.py b/chython/algorithms/mapping/attention.py index e8c75ff0..bc4e870a 100644 --- a/chython/algorithms/mapping/attention.py +++ b/chython/algorithms/mapping/attention.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # @@ -33,6 +33,7 @@ class Attention: __slots__ = () + __class_cache__ = {} def reset_mapping(self: Union['ReactionContainer', 'Attention'], *, return_score: bool = False, multiplier=1.75, keep_reactants_numbering=False) -> Union[bool, float]: diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index fecbae0d..5f463035 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -21,7 +21,6 @@ from CachedMethods import cached_method from collections import defaultdict from functools import cached_property -from hashlib import sha512 from heapq import heappop, heappush from itertools import product from random import random @@ -148,10 +147,6 @@ def __eq__(self, other): def __hash__(self): return hash(str(self)) - @cached_method - def __bytes__(self): - return sha512(str(self).encode()).digest() - @cached_property def smiles_atoms_order(self) -> Tuple[int, ...]: """ diff --git a/chython/algorithms/standardize/reaction.py b/chython/algorithms/standardize/reaction.py index 1cb20f28..8f5ab282 100644 --- a/chython/algorithms/standardize/reaction.py +++ b/chython/algorithms/standardize/reaction.py @@ -272,10 +272,9 @@ def __remove_reagents_rules(self: 'ReactionContainer', keep_reagents): tmp.extend(reagents_st2) reagents = tuple(tmp) if keep_reagents else () - self._ReactionContainer__reactants = tuple(reactants_st2) - self._ReactionContainer__products = tuple(products_st2) - self._ReactionContainer__reagents = reagents - self.flush_cache() + self._reactants = tuple(reactants_st2) + self._products = tuple(products_st2) + self._reagents = reagents self.fix_positions() return True @@ -307,10 +306,9 @@ def __remove_reagents_mapping(self: 'ReactionContainer', keep_reagents): reagents = tuple(tmp) if keep_reagents else () if len(reactants) != len(self.reactants) or len(products) != len(self.products) or len(reagents) != len(self.reagents): - self._ReactionContainer__reactants = tuple(reactants) - self._ReactionContainer__products = tuple(products) - self._ReactionContainer__reagents = reagents - self.flush_cache() + self._reactants = tuple(reactants) + self._products = tuple(products) + self._reagents = reagents self.fix_positions() return True return False @@ -327,7 +325,7 @@ def contract_ions(self: 'ReactionContainer') -> bool: salts = _contract_ions(anions, cations, total) if salts: neutral.extend(salts) - self._ReactionContainer__reagents = tuple(neutral) + self._reagents = tuple(neutral) changed = True else: changed = False @@ -338,7 +336,7 @@ def contract_ions(self: 'ReactionContainer') -> bool: anions_order = {frozenset(m): n for n, m in enumerate(anions)} cations_order = {frozenset(m): n for n, m in enumerate(cations)} neutral.extend(salts) - self._ReactionContainer__reactants = tuple(neutral) + self._reactants = tuple(neutral) changed = True else: anions_order = cations_order = {} @@ -350,11 +348,10 @@ def contract_ions(self: 'ReactionContainer') -> bool: salts = _contract_ions(anions, cations, total) if salts: neutral.extend(salts) - self._ReactionContainer__products = tuple(neutral) + self._products = tuple(neutral) changed = True if changed: - self.flush_cache() self.fix_positions() return True return False diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index e6263811..5ac7fb43 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -527,6 +527,9 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non return compress(data, 9) return data + def pach(self, *, compressed=True, check=True, version=2, order: List[int] = None) -> bytes: + return self.pack(compressed=compressed, check=check, version=version, order=order) + @classmethod def pack_len(cls, data: bytes, /, *, compressed=True) -> int: """ @@ -586,6 +589,16 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, return mol, pack_length return mol + @classmethod + def unpach(cls, data: Union[bytes, memoryview], /, *, compressed=True) -> 'MoleculeContainer': + """ + Unpack from compressed bytes. + """ + return cls.unpack(data, compressed=compressed) + + def __bytes__(self): + return self.pack() + def _cpack(self, order=None, check=True): if order is None: order = list(self._atoms) diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index e34185e8..2d154924 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,11 +18,10 @@ # from CachedMethods import cached_method from functools import reduce -from hashlib import sha512 from itertools import chain from math import ceil from operator import itemgetter, or_ -from typing import Dict, Iterable, Iterator, Optional, Tuple, List +from typing import Dict, Iterator, Optional, Tuple, List, Sequence from zlib import compress, decompress from .cgr import CGRContainer from .molecule import MoleculeContainer @@ -38,11 +37,10 @@ class ReactionContainer(StandardizeReaction, Mapping, Calculate2DReaction, Depic Reaction storage hashable and comparable. based on reaction unique signature (SMILES). """ - __slots__ = ('__reactants', '__products', '__reagents', '__meta', '__name', '_arrow', '_signs', '__dict__') - __class_cache__ = {} + __slots__ = ('_reactants', '_products', '_reagents', '_meta', '_name', '_arrow', '_signs', '__dict__') - def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterable[MoleculeContainer] = (), - reagents: Iterable[MoleculeContainer] = (), meta: Optional[Dict] = None, name: Optional[str] = None): + def __init__(self, reactants: Sequence[MoleculeContainer] = (), products: Sequence[MoleculeContainer] = (), + reagents: Sequence[MoleculeContainer] = (), meta: Optional[Dict] = None, name: Optional[str] = None): """ New reaction object creation @@ -60,15 +58,15 @@ def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterab elif not all(isinstance(x, MoleculeContainer) for x in chain(reactants, products, reagents)): raise TypeError(f'MoleculeContainers expected') - self.__reactants = reactants - self.__products = products - self.__reagents = reagents + self._reactants = reactants + self._products = products + self._reagents = reagents if meta is None: - self.__meta = None + self._meta = None else: - self.__meta = dict(meta) + self._meta = dict(meta) if name is None: - self.__name = None + self._name = None else: self.name = name self._arrow = None @@ -76,21 +74,21 @@ def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterab @property def reactants(self) -> Tuple[MoleculeContainer, ...]: - return self.__reactants + return self._reactants @property def reagents(self) -> Tuple[MoleculeContainer, ...]: - return self.__reagents + return self._reagents @property def products(self) -> Tuple[MoleculeContainer, ...]: - return self.__products + return self._products def molecules(self) -> Iterator[MoleculeContainer]: """ Iterator of all reaction molecules """ - return chain(self.__reactants, self.__reagents, self.__products) + return chain(self.reactants, self.reagents, self.products) @property def meta(self) -> Dict: @@ -98,33 +96,33 @@ def meta(self) -> Dict: Dictionary of metadata. Like DTYPE-DATUM in RDF """ - if self.__meta is None: - self.__meta = {} # lazy - return self.__meta + if self._meta is None: + self._meta = {} # lazy + return self._meta @property def name(self) -> str: - return self.__name or '' + return self._name or '' @name.setter def name(self, name: str): if not isinstance(name, str): raise TypeError('name should be string up to 80 symbols') - self.__name = name + self._name = name def copy(self) -> 'ReactionContainer': """ Get copy of object """ copy = object.__new__(self.__class__) - copy._ReactionContainer__reactants = tuple(x.copy() for x in self.__reactants) - copy._ReactionContainer__products = tuple(x.copy() for x in self.__products) - copy._ReactionContainer__reagents = tuple(x.copy() for x in self.__reagents) - copy._ReactionContainer__name = self.__name - if self.__meta is None: - copy._ReactionContainer__meta = None + copy._reactants = tuple(x.copy() for x in self.reactants) + copy._products = tuple(x.copy() for x in self.products) + copy._reagents = tuple(x.copy() for x in self.reagents) + copy._name = self._name + if self._meta is None: + copy._meta = None else: - copy._ReactionContainer__meta = self.__meta.copy() + copy._meta = self._meta.copy() copy._arrow = self._arrow copy._signs = self._signs return copy @@ -137,23 +135,23 @@ def compose(self) -> CGRContainer: Reagents will be presented as unchanged molecules :return: CGRContainer """ - rr = self.__reagents + self.__reactants + rr = self.reagents + self.reactants if rr: r = reduce(or_, rr) else: r = MoleculeContainer() - if self.__products: - p = reduce(or_, self.__products) + if self.products: + p = reduce(or_, self.products) else: p = MoleculeContainer() return r ^ p - def flush_cache(self): + def flush_cache(self, **kwargs): self.__dict__.clear() for m in self.molecules(): - m.flush_cache() + m.flush_cache(**kwargs) - def pack(self, *, compressed=True, check=True): + def pack(self, *, compressed=True, check=True) -> bytes: """ Pack into compressed bytes. @@ -172,12 +170,18 @@ def pack(self, *, compressed=True, check=True): :param compressed: return zlib-compressed pack. :param check: check molecules for format restrictions. """ - data = b''.join((bytearray((1, len(self.__reactants), len(self.__reagents), len(self.__products))), + data = b''.join((bytearray((1, len(self.reactants), len(self.reagents), len(self.products))), *(m.pack(compressed=False, check=check) for m in self.molecules()))) if compressed: return compress(data, 9) return data + def pach(self, *, compressed=True, check=True) -> bytes: + """ + Pack into compressed bytes. + """ + return self.pack(compressed=compressed, check=check) + @classmethod def pack_len(cls, data: bytes, /, *, compressed=True) -> Tuple[List[int], List[int], List[int]]: """ @@ -225,7 +229,7 @@ def unpack(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': raise ValueError('invalid pack header') reactants, reagents, products = data[1], data[2], data[3] - molecules = [] + molecules: List[MoleculeContainer] = [] shift = 4 for _ in range(reactants + reagents + products): m, pl = MoleculeContainer.unpack(data[shift:], compressed=False, _return_pack_length=True) @@ -233,6 +237,16 @@ def unpack(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': shift += pl return cls(molecules[:reactants], molecules[-products:], molecules[reactants: -products]) + @classmethod + def unpach(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': + """ + Unpack from compressed bytes. + """ + return cls.unpack(data, compressed=compressed) + + def __bytes__(self): + return self.pack() + def __invert__(self) -> CGRContainer: """ Get CGR of reaction @@ -246,15 +260,11 @@ def __eq__(self, other): def __hash__(self): return hash(str(self)) - @cached_method - def __bytes__(self): - return sha512(str(self).encode()).digest() - def __bool__(self): """ Exists both reactants and products """ - return bool(self.__reactants and self.__products) + return bool(self.reactants and self.products) @cached_method def __str__(self): @@ -279,7 +289,7 @@ def __format__(self, format_spec): contract = [] radicals = [] - for ml in (self.__reactants, self.__reagents, self.__products): + for ml in (self.reactants, self.reagents, self.products): mso = [(m, *m.__format__(format_spec, _return_order=True)) for m in ml] if not format_spec or '!c' not in format_spec: mso.sort(key=itemgetter(1)) @@ -306,29 +316,8 @@ def __format__(self, format_spec): return f"{'>'.join(sig)} |{','.join(cx)}|" return '>'.join(sig) - @cached_method def __len__(self): - return len(self.__reactants) + len(self.__products) + len(self.__reagents) - - def __getstate__(self): - state = {'reactants': self.__reactants, 'products': self.__products, 'reagents': self.__reagents, - 'meta': self.__meta, 'name': self.__name, 'arrow': self._arrow, 'signs': self._signs} - from chython import pickle_cache - - if pickle_cache: - state['cache'] = self.__dict__ - return state - - def __setstate__(self, state): - self.__reactants = state['reactants'] - self.__products = state['products'] - self.__reagents = state['reagents'] - self.__meta = state['meta'] - self.__name = state['name'] - self._arrow = state['arrow'] - self._signs = state['signs'] - if 'cache' in state: - self.__dict__.update(state['cache']) + return len(self.reactants) + len(self.products) + len(self.reagents) __all__ = ['ReactionContainer'] From 3ab3629273e3bfbc2bb7271bd88c6aec4382454c Mon Sep 17 00:00:00 2001 From: stsouko Date: Fri, 22 Nov 2024 08:41:43 +0100 Subject: [PATCH 36/51] saved --- chython/reactor/base.py | 6 +++++- chython/reactor/reactor.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/chython/reactor/base.py b/chython/reactor/base.py index 16f8b918..fae981bc 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -21,7 +21,7 @@ from itertools import product from ..containers import MoleculeContainer, QueryContainer from ..containers.bonds import Bond -from ..periodictable import Element, ListElement, AnyElement +from ..periodictable import Element, ListElement, AnyElement, QueryElement class BaseReactor: @@ -34,6 +34,10 @@ def __init__(self, reactants, products, delete_atoms, fix_rings, fix_tautomers): self.__variable = variable = [] atoms = defaultdict(dict) + if isinstance(products, MoleculeContainer): + # full replacement of atoms + for n, atom in products.atoms(): + elements[n] = atom.copy(hydrogens=True, stereo=True) for n, atom in products.atoms(): atoms[n].update(charge=atom.charge, is_radical=atom.is_radical) if atom.atomic_number: # replace atom diff --git a/chython/reactor/reactor.py b/chython/reactor/reactor.py index 08cb024c..ce74a6d1 100644 --- a/chython/reactor/reactor.py +++ b/chython/reactor/reactor.py @@ -69,7 +69,7 @@ def __init__(self, patterns: Tuple[QueryContainer, ...], self.__polymerise_limit = polymerise_limit self.__products_atoms = tuple(set(m) for m in products) self.__automorphism_filter = automorphism_filter - super().__init__({n for x in patterns for n, h in x._masked.items() if not h}, reduce(or_, products), + super().__init__({n for x in patterns for n, a in x.atoms() if not a.masked}, reduce(or_, products), delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, *structures: MoleculeContainer): From 41e2b1fcb9f4764d66516001073c772a2b7034c2 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 10:00:36 +0100 Subject: [PATCH 37/51] fixes. removed overoptimizations. --- chython/algorithms/depict.py | 2 +- chython/algorithms/fingerprints/__init__.py | 2 +- chython/algorithms/isomorphism.py | 4 ++-- chython/algorithms/mcs.py | 4 ++-- chython/algorithms/morgan.py | 8 ++++---- chython/algorithms/standardize/molecule.py | 18 ++++++++---------- chython/algorithms/standardize/resonance.py | 6 +++--- chython/algorithms/standardize/saturation.py | 8 ++++---- chython/algorithms/stereo.py | 8 ++++---- chython/algorithms/x3dom.py | 4 ++-- chython/containers/graph.py | 4 ++-- chython/containers/molecule.py | 4 ++-- chython/files/_mdl/emol.py | 2 +- chython/files/_mdl/erxn.py | 2 +- chython/files/_mdl/mol.py | 2 +- chython/files/_mdl/rxn.py | 2 +- chython/files/_mdl/write.py | 8 ++++---- chython/files/libinchi/wrapper.py | 4 ++-- 18 files changed, 45 insertions(+), 47 deletions(-) diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index 73cf2319..b0819b03 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -351,7 +351,7 @@ def __render_atoms(self: 'MoleculeContainer', uid): define = [] mask = [] - for n, atom in self._atoms.items(): + for n, atom in self.atoms(): x, y = atom.x, -atom.y symbol = atom.atomic_symbol if (symbol != 'C' or atom.charge or atom.is_radical or atom.isotope or carbon diff --git a/chython/algorithms/fingerprints/__init__.py b/chython/algorithms/fingerprints/__init__.py index 0f6febf1..ec2121fa 100644 --- a/chython/algorithms/fingerprints/__init__.py +++ b/chython/algorithms/fingerprints/__init__.py @@ -32,7 +32,7 @@ class Fingerprints(LinearFingerprint, MorganFingerprint): @property def _atom_identifiers(self: 'MoleculeContainer'): return {idx: hash((atom.isotope or 0, atom.atomic_number, atom.charge, atom.is_radical)) - for idx, atom in self._atoms.items()} + for idx, atom in self.atoms()} class FingerprintsCGR(LinearFingerprint, MorganFingerprint): diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 2a64bdf8..43586504 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -190,7 +190,7 @@ def _cython_compiled_structure(self: 'MoleculeContainer'): bits2 = [] bits3 = [] bits4 = [] - for i, (n, a) in enumerate(self._atoms.items()): + for i, (n, a) in enumerate(self.atoms()): mapping[n] = i numbers.append(n) v2 = 1 << (a.hybridization - 1) @@ -318,7 +318,7 @@ def get_mapping(query, scope): for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope): - for n, a in self._atoms.items(): + for n, a in self.atoms(): if a.stereo is None: continue m = mapping[n] diff --git a/chython/algorithms/mcs.py b/chython/algorithms/mcs.py index 437d2dcf..8e1bf41b 100644 --- a/chython/algorithms/mcs.py +++ b/chython/algorithms/mcs.py @@ -97,10 +97,10 @@ def __get_product(self: 'molecule.MoleculeContainer', other: 'molecule.MoleculeC o_bonds = other._bonds s_equal = defaultdict(list) # equal self atoms - for n, atom in self._atoms.items(): + for n, atom in self.atoms(): s_equal[atom].append(n) p_equal = defaultdict(list) # equal other atoms - for n, atom in other._atoms.items(): + for n, atom in other.atoms(): p_equal[atom].append(n) full_product = {} diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index e200cbc3..c56b5572 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -40,12 +40,12 @@ def atoms_order(self: 'MoleculeContainer') -> Dict[int, int]: :return: dict of atom-order pairs """ - if not self._atoms: # for empty containers + if not self: # for empty containers return {} - elif len(self._atoms) == 1: # optimize single atom containers - return dict.fromkeys(self._atoms, 1) + elif len(self) == 1: # optimize single atom containers + return dict.fromkeys(self, 1) ring = self.ring_atoms - return _morgan({n: hash((hash(a), n in ring)) for n, a in self._atoms.items()}, self.int_adjacency) + return _morgan({n: hash((hash(a), n in ring)) for n, a in self.atoms()}, self.int_adjacency) @cached_property def int_adjacency(self: 'MoleculeContainer') -> Dict[int, Dict[int, int]]: diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 5f4e5e77..92840c9b 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -55,7 +55,7 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F h, changed = self.implicify_hydrogens(_fix_stereo=False, logging=True) if fix_tautomers and (logging or keep_kekule): # thiele can change tautomeric form - hgs = {n: a.implicit_hydrogens for n, a in self._atoms.items()} + hgs = {n: a.implicit_hydrogens for n, a in self.atoms()} if keep_kekule: # save bond orders bonds = [(b, b.order) for _, _, b in self.bonds()] @@ -66,7 +66,7 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F if keep_kekule and t: # restore # check ring charge/hydrogen moving - if c or fix_tautomers and hgs != self._hydrogens: # noqa + if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self.atoms()): self.kekule() # we need to do full kekule again else: for b, o in bonds: # noqa @@ -81,12 +81,12 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F s.append((tuple(changed), -1, 'implicified')) if t: s.append(((), -1, 'aromatized')) - if fix_tautomers and (x := tuple(n for n, a in self._atoms.items() if hgs[n] != a.implicit_hydrogens)): + if fix_tautomers and (x := tuple(n for n, a in self.atoms() if hgs[n] != a.implicit_hydrogens)): s.append((x, -1, 'aromatic tautomer found')) if c: s.append((tuple(c), -1, 'recharged')) if keep_kekule and t: - if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self._atoms.items()): + if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self.atoms()): s.append(((), -1, 'kekulized again')) else: s.append(((), -1, 'kekule form restored')) @@ -123,7 +123,7 @@ def standardize(self: Union['MoleculeContainer', 'Standardize'], *, logging=Fals log.extend(l) fixed.update(f) - if b := fixed.intersection(n for n, a in self._atoms.items() if a.implicit_hydrogens is None): + if b := fixed.intersection(n for n, a in self.atoms() if a.implicit_hydrogens is None): if ignore: log.append((tuple(b), -1, 'standardization failed')) else: @@ -271,17 +271,15 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, :param keep_to_terminal: Keep any bonds to terminal hydrogens :return: removed bonds count """ - bonds = self._bonds - ab = [(n, m) for n, m, b in self.bonds() if b == 8] if keep_to_terminal: skeleton = self.not_special_connectivity - hs = {n for n, a in self._atoms.items() if a == H and not skeleton[n]} + hs = {n for n, a in self.atoms() if a == H and not skeleton[n]} ab = [(n, m) for n, m in ab if n not in hs and m not in hs] for n, m in ab: - del bonds[n][m], bonds[m][n] + self.delete_bond(n, m, _skip_calculation=True) if ab: self.flush_cache(keep_sssr=True) @@ -404,7 +402,7 @@ def check_valence(self: 'MoleculeContainer') -> List[int]: :return: list of invalid atoms """ # only invalid atoms have None hydrogens. - return [n for n, a in self._atoms.items() if a.implicit_hydrogens is None] + return [n for n, a in self.atoms() if a.implicit_hydrogens is None] def clean_isotopes(self: 'MoleculeContainer') -> bool: """ diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 2283540f..593b4ef5 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -135,7 +135,7 @@ def __find_delocalize_path(self: 'MoleculeContainer', start, finish, constrains, def __entries(self: 'MoleculeContainer'): atoms = self._atoms bonds = self._bonds - errors = {n for n, a in atoms.items() if a.implicit_hydrogens is None} + errors = {n for n, a in self.atoms() if a.implicit_hydrogens is None} transfer = set() entries = set() @@ -144,7 +144,7 @@ def __entries(self: 'MoleculeContainer'): nitrogen_cat = set() nitrogen_ani = set() sulfur_cat = set() - for n, a in atoms.items(): + for n, a in self.atoms(): if a not in (B, C, N, O, Si, P, S, As, Se, Te): # filter non-organic set, halogens and aromatics continue @@ -182,7 +182,7 @@ def __entries(self: 'MoleculeContainer'): transfer.add(n) if exits or entries: # try to move cation to nitrogen. saturation fixup. - for n, a in self._atoms.items(): + for n, a in self.atoms(): if a == N and not a.charge: if a.hybridization == 1 and a.neighbors <= 3: # any amine - potential e-donor entries.add(n) diff --git a/chython/algorithms/standardize/saturation.py b/chython/algorithms/standardize/saturation.py index 38c5bb1e..03fa9c1e 100644 --- a/chython/algorithms/standardize/saturation.py +++ b/chython/algorithms/standardize/saturation.py @@ -76,11 +76,11 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, expected_charge = int(self) if reset_electrons: - charges = {x: None for x in self._atoms} - radicals = {x: None for x in self._atoms} + charges = {x: None for x in self} + radicals = {x: None for x in self} else: - charges = {n: a.charge for n, a in self._atoms.items()} - radicals = {n: a.is_radical for n, a in self._atoms.items()} + charges = {n: a.charge for n, a in self.atoms()} + radicals = {n: a.is_radical for n, a in self.atoms()} sat, adjacency = _find_possible_valences(atoms, neighbors_distances or self._bonds, charges, radicals, neighbors_distances is not None) charges = {} # new charge states diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index e243d6a2..1be70e3a 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -168,7 +168,7 @@ def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: Carbon sp3 atom numbers. """ tetra = [] - for n, atom in self._atoms.items(): + for n, atom in self.atoms(): if atom == C and not atom.charge and not atom.is_radical: env = self._bonds[n] if all(b == 1 for b in env.values()): @@ -577,7 +577,7 @@ def fix_stereo(self: 'MoleculeContainer'): atoms_stereo = [] allenes_stereo = [] cis_trans_stereo = [] - for n, a in self._atoms.items(): + for n, a in self.atoms(): if a.stereo is None: continue elif n in stereo_tetrahedrons: @@ -960,7 +960,7 @@ def __wedge_sign(self: 'MoleculeContainer', order): @cached_property def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: - stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} + stereo_atoms = {n for n, a in self.atoms() if a.stereo is not None} stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} if not stereo_atoms and not stereo_bonds: return self.atoms_order @@ -1103,7 +1103,7 @@ def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): chiral_c.add(n) # skip already marked. - stereo_atoms = {n for n, a in self._atoms.items() if a.stereo is not None} + stereo_atoms = {n for n, a in self.atoms() if a.stereo is not None} chiral_t.difference_update(stereo_atoms) chiral_a.difference_update(stereo_atoms) diff = set() diff --git a/chython/algorithms/x3dom.py b/chython/algorithms/x3dom.py index 9d59160d..73779280 100644 --- a/chython/algorithms/x3dom.py +++ b/chython/algorithms/x3dom.py @@ -181,7 +181,7 @@ def __render_atoms(self: 'MoleculeContainer', xyz): atoms = [] if carbon: - for n, a in self._atoms.items(): + for n, a in self.atoms(): r = radius or a.atomic_radius * multiplier fr = r * 0.71 atoms.append(f" \n" @@ -197,7 +197,7 @@ def __render_atoms(self: 'MoleculeContainer', xyz): f" \n \n" " \n \n \n \n") else: - for n, a in self._atoms.items(): + for n, a in self.atoms(): r = radius or a.atomic_radius * multiplier atoms.append(f" \n" " \n \n" diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 4586969e..f644ebb6 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -122,7 +122,7 @@ def copy(self): copy of graph """ copy = object.__new__(self.__class__) - copy._atoms = {n: atom.copy(full=True) for n, atom in self._atoms.items()} + copy._atoms = {n: atom.copy(full=True) for n, atom in self.atoms()} copy._bonds = cb = {} for n, m_bond in self._bonds.items(): cb[n] = cbn = {} @@ -144,7 +144,7 @@ def remap(self, mapping: Dict[int, int]): raise ValueError('mapping overlap') mg = mapping.get - self._atoms = {mg(n, n): atom for n, atom in self._atoms.items()} + self._atoms = {mg(n, n): atom for n, atom in self.atoms()} self._bonds = {mg(n, n): {mg(m, m): bond for m, bond in m_bond.items()} for n, m_bond in self._bonds.items()} self.flush_cache() diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 5ac7fb43..a8bd6094 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -292,11 +292,11 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul raise ValueError('empty atoms list not allowed') if set(atoms) - self._atoms.keys(): raise ValueError('invalid atom numbers') - atoms = tuple(n for n in self._atoms if n in atoms) # save original order + atoms = tuple(n for n in self if n in atoms) # save original order if as_query: sub = object.__new__(QueryContainer) - lost = {n for n, a in self._atoms.items() if a != H} - set(atoms) # atoms not in substructure + lost = {n for n, a in self.atoms() if a != H} - set(atoms) # atoms not in substructure # atoms with fully present neighbors not_skin = {n for n in atoms if lost.isdisjoint(self._bonds[n])} diff --git a/chython/files/_mdl/emol.py b/chython/files/_mdl/emol.py index a5a5475b..e8390a9c 100644 --- a/chython/files/_mdl/emol.py +++ b/chython/files/_mdl/emol.py @@ -21,7 +21,7 @@ def parse_mol_v3000(data, *, _header=True): if _header: - title = data[1].strip() or None + title = data[0].strip() or None data = data[4:] else: title = None diff --git a/chython/files/_mdl/erxn.py b/chython/files/_mdl/erxn.py index 6b707b3e..d088cabe 100644 --- a/chython/files/_mdl/erxn.py +++ b/chython/files/_mdl/erxn.py @@ -29,7 +29,7 @@ def parse_rxn_v3000(data, *, ignore=True): if not reagents_count: raise EmptyReaction - title = data[2].strip() or None + title = data[1].strip() or None log = [] molecules = [] diff --git a/chython/files/_mdl/mol.py b/chython/files/_mdl/mol.py index 93913a89..88b21373 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/_mdl/mol.py @@ -32,7 +32,7 @@ def parse_mol_v2000(data): raise EmptyMolecule log = [] - title = data[1].strip() or None + title = data[0].strip() or None atoms = [] bonds = [] stereo = [] diff --git a/chython/files/_mdl/rxn.py b/chython/files/_mdl/rxn.py index 50df40e4..56977fe1 100644 --- a/chython/files/_mdl/rxn.py +++ b/chython/files/_mdl/rxn.py @@ -29,7 +29,7 @@ def parse_rxn_v2000(data, *, ignore=True): if not reagents_count: raise EmptyReaction - title = data[2].strip() or None + title = data[1].strip() or None log = [] molecules = [] diff --git a/chython/files/_mdl/write.py b/chython/files/_mdl/write.py index 3319c60d..a998251e 100644 --- a/chython/files/_mdl/write.py +++ b/chython/files/_mdl/write.py @@ -82,7 +82,7 @@ def _write_molecule(self, g, write3d=None): file = self._file file.write(f'M V30 BEGIN CTAB\nM V30 COUNTS {g.atoms_count} {g.bonds_count} 0 0 0\nM V30 BEGIN ATOM\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, (m, a) in enumerate(g.atoms(), start=1): if write3d is not None: x, y, z = xyz[m] z = f'{z:.4f}' @@ -131,7 +131,7 @@ def _write_molecule(self, g, write3d=None): file = self._file file.write(f'{g.name}\n\n\n{g.atoms_count:3d}{g.bonds_count:3d} 0 0 0 0 999 V2000\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, (m, a) in enumerate(g.atoms(), start=1): if write3d is not None: x, y, z = xyz[m] else: @@ -142,7 +142,7 @@ def _write_molecule(self, g, write3d=None): m = 0 file.write(f'{x:10.4f}{y:10.4f}{z:10.4f} {a.atomic_symbol:3s} 0{c} 0 0 0 0 0 0 0{m:3d} 0 0\n') - atoms = {m: n for n, m in enumerate(g._atoms, start=1)} + atoms = {m: n for n, m in enumerate(g, start=1)} wedge = defaultdict(set) for n, m, s in g._wedge_map: file.write(f'{atoms[n]:3d}{atoms[m]:3d} {bonds[n][m].order} {s == 1 and "1" or "6"} 0 0 0\n') @@ -152,7 +152,7 @@ def _write_molecule(self, g, write3d=None): if m not in wedge[n]: file.write(f'{atoms[n]:3d}{atoms[m]:3d} {b.order} 0 0 0 0\n') - for n, a in enumerate(g._atoms.values(), start=1): + for n, (_, a) in enumerate(g.atoms(), start=1): if a.isotope: file.write(f'M ISO 1 {n:3d} {a.isotope:3d}\n') if a.is_radical: diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 215a2ba7..8d583fb6 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -201,8 +201,8 @@ def isotope(self): @property def delta_isotope(self): - if self.isotope > 9000: - return self.isotope - 10_000 + if self.isotopic_mass > 9000: + return self.isotopic_mass - 10_000 @property def is_radical(self): From 41e1486778d58677104a68060c84261ae3267142 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 10:52:18 +0100 Subject: [PATCH 38/51] removed overoptimizations. --- chython/algorithms/depict.py | 26 ++++++++++------------ chython/algorithms/isomorphism.py | 4 ++-- chython/algorithms/standardize/molecule.py | 2 +- chython/algorithms/stereo.py | 7 +++--- chython/containers/molecule.py | 10 ++++----- chython/utils/grid.py | 23 ++++++++++--------- chython/utils/retro.py | 19 ++++++++-------- 7 files changed, 45 insertions(+), 46 deletions(-) diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index b0819b03..5b00406b 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -206,17 +206,16 @@ def depict(self: Union['MoleculeContainer', 'DepictMolecule'], *, width=None, he :param clean2d: calculate coordinates if necessary. """ uid = str(uuid4()) - atoms = self._atoms.values() - min_x = min(a.x for a in atoms) - max_x = max(a.x for a in atoms) - min_y = min(a.y for a in atoms) - max_y = max(a.y for a in atoms) + min_x = min(a.x for _, a in self.atoms()) + max_x = max(a.x for _, a in self.atoms()) + min_y = min(a.y for _, a in self.atoms()) + max_y = max(a.y for _, a in self.atoms()) if clean2d and len(self) > 1 and max_y - min_y < .01 and max_x - min_x < 0.01: self.clean2d() - min_x = min(a.x for a in atoms) - max_x = max(a.x for a in atoms) - min_y = min(a.y for a in atoms) - max_y = max(a.y for a in atoms) + min_x = min(a.x for _, a in self.atoms()) + max_x = max(a.x for _, a in self.atoms()) + min_y = min(a.y for _, a in self.atoms()) + max_y = max(a.y for _, a in self.atoms()) bonds = self.__render_bonds() atoms, define, masks = self.__render_atoms(uid) @@ -455,11 +454,10 @@ def depict(self: 'ReactionContainer', *, width=None, height=None, clean2d: bool if clean2d: for m in self.molecules(): if len(m) > 1: - atoms = m._atoms.values() - min_x = min(a.x for a in atoms) - max_x = max(a.x for a in atoms) - min_y = min(a.y for a in atoms) - max_y = max(a.y for a in atoms) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() self.fix_positions() diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 43586504..23257e18 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -128,8 +128,8 @@ def __contains__(self: 'MoleculeContainer', other: Union[Element, Query, str]): Atom in Structure test. """ if isinstance(other, str): - return any(other == x.atomic_symbol for x in self._atoms.values()) - return any(other == x for x in self._atoms.values()) + return any(other == a.atomic_symbol for _, a in self.atoms()) + return any(other == a for _, a in self.atoms()) def is_automorphic(self): """ diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 92840c9b..65ce7e85 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -409,7 +409,7 @@ def clean_isotopes(self: 'MoleculeContainer') -> bool: Clean isotope marks from molecule. Return True if any isotope found. """ - isotopes = [x for x in self._atoms.values() if x.isotope] + isotopes = [a for _, a in self.atoms() if a.isotope] if isotopes: for i in isotopes: i._isotope = None diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index 1be70e3a..fd19fa75 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -155,11 +155,10 @@ def clean_stereo(self: 'MoleculeContainer'): """ Remove stereo data. """ - for a in self._atoms.values(): + for _, a in self.atoms(): a._stereo = None - for _, bs in self._bonds: - for b in bs.values(): - b._stereo = None # flush twice, but it should be still faster + for *_, b in self.bonds(): + b._stereo = None self.flush_cache(keep_sssr=True, keep_components=True) @cached_property diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index a8bd6094..984f845c 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -128,25 +128,25 @@ def molecular_charge(self) -> int: """ Total charge of molecule """ - return sum(a.charge for a in self._atoms.values()) + return sum(a.charge for _, a in self.atoms()) @cached_property def is_radical(self) -> bool: """ True if at least one atom is radical """ - return any(a.is_radical for a in self._atoms.values()) + return any(a.is_radical for _, a in self.atoms()) @cached_property def molecular_mass(self) -> float: h = _H().atomic_mass - return sum(a.atomic_mass + a.implicit_hydrogens * h for a in self._atoms.values()) + return sum(a.atomic_mass + a.implicit_hydrogens * h for _, a in self.atoms()) @cached_property def brutto(self) -> Dict[str, int]: """Counted atoms dict""" - c = Counter(a.atomic_symbol for a in self._atoms.values()) - c['H'] += sum(a.implicit_hydrogens for a in self._atoms.values()) + c = Counter(a.atomic_symbol for _, a in self.atoms()) + c['H'] += sum(a.implicit_hydrogens for _, a in self.atoms()) return dict(c) @cached_property diff --git a/chython/utils/grid.py b/chython/utils/grid.py index cc15d718..1a771718 100644 --- a/chython/utils/grid.py +++ b/chython/utils/grid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # @@ -52,11 +52,10 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] if clean2d: for m in molecules: if len(m) > 1: - values = m._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() @@ -65,12 +64,12 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] for m in ms: if m is None: break - min_y = min(y for x, y in m._plane.values()) - max_y = max(y for x, y in m._plane.values()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) h = max_y - min_y if row_height < h: # get height of row row_height = h - planes.append(m._plane.copy()) + planes.append([a.xy for _, a in m.atoms()]) max_x = 0. for m in ms: @@ -88,8 +87,10 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] shift_y -= row_height + 4. * font_size # restore planes - for p, m in zip(planes, molecules): - m._plane = p + for m, p in zip(molecules, planes): + for (_, a), (x, y) in zip(m.atoms(), p): + a.x = x + a.y = y _width = shift_x - 1.5 * font_size _height = -shift_y - 1.5 * font_size diff --git a/chython/utils/retro.py b/chython/utils/retro.py index d94ec666..8fa1aaec 100644 --- a/chython/utils/retro.py +++ b/chython/utils/retro.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # Copyright 2021 Alexander Sizov # This file is part of chython. # @@ -66,22 +66,21 @@ def retro_depict(tree: Tree, *, y_gap=3., x_gap=5., width=None, height=None, cle if clean2d: for m in column: if len(m) > 1: - values = m._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() - heights = [max(y for _, y in m._plane.values()) - min(y for _, y in m._plane.values()) for m in column] + heights = [max(a.y for _, a in m.atoms()) - min(a.y for _, a in m.atoms()) for m in column] y_shift = sum(heights) + y_gap * (len(heights) - 1) # column height with gaps if y_shift > c_max_y: c_max_y = y_shift y_shift /= 2. # center align for m, h in zip(column, heights): - plane = m._plane.copy() # backup + plane = [a.xy for _, a in m.atoms()] # backup mx = m._fix_plane_min(x_shift, -y_shift) if mx > c_max_x: c_max_x = mx @@ -92,7 +91,9 @@ def retro_depict(tree: Tree, *, y_gap=3., x_gap=5., width=None, height=None, cle y_shift -= h + y_gap render.append(m.depict(_embedding=True)[:5]) - m._plane = plane # restore + for (_, a), (x, y) in zip(m.atoms(), plane): # restore + a.x = x + a.y = y x_shift = c_max_x + x_gap # between columns gap last_layer = current_layer From 534c983eeb62ef8ef9f37a1b63badfea01750ee2 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 10:59:34 +0100 Subject: [PATCH 39/51] fixed FWA --- chython/utils/free_wilson.py | 38 ++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/chython/utils/free_wilson.py b/chython/utils/free_wilson.py index e836aa6d..9ee415c3 100644 --- a/chython/utils/free_wilson.py +++ b/chython/utils/free_wilson.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -42,31 +42,31 @@ def fw_prepare_groups(core: Union[MoleculeContainer, QueryContainer], molecule: cs = set(core_map.values()) groups = molecule.substructure(molecule._atoms.keys() - cs, recalculate_hydrogens=False) gs = set(groups) - hs = molecule._hydrogens - hgs = groups._hydrogens - plane = molecule._plane cf = molecule.substructure(cs, recalculate_hydrogens=False) - chs = cf._hydrogens for n, m, b in molecule.bonds(): if n in cs: if m in gs: - h = H() - h._Core__isotope = reverse[n] # mark mapping to isotope - groups.add_bond(groups.add_atom(h, xy=plane[n]), m, b.copy()) - hgs[m] = hs[m] # restore H count - - cf.add_bond(cf.add_atom(h.copy(), xy=plane[m]), n, b.copy()) - chs[n] = hs[n] + a = molecule.atom(n) + h = H(x=a.x, y=a.y) + h._isotope = reverse[n] # mark mapping to isotope + groups.add_bond(groups.add_atom(h, _skip_calculation=True), m, b.copy(), _skip_calculation=True) + + a = molecule.atom(m) + h = H(x=a.x, y=a.y) + h._isotope = reverse[n] # mark mapping to isotope + cf.add_bond(cf.add_atom(h, _skip_calculation=True), n, b.copy(), _skip_calculation=True) elif m in cs and n in gs: - h = H() - h._Core__isotope = reverse[m] - groups.add_bond(groups.add_atom(h, xy=plane[m]), n, b.copy()) - hgs[n] = hs[n] - - cf.add_bond(cf.add_atom(h.copy(), xy=plane[n]), m, b.copy()) - chs[m] = hs[m] + a = molecule.atom(m) + h = H(x=a.x, y=a.y) + h._isotope = reverse[m] + groups.add_bond(groups.add_atom(h, _skip_calculation=True), n, b.copy(), _skip_calculation=True) + + a = molecule.atom(n) + h = H(x=a.x, y=a.y) + h._isotope = reverse[m] # mark mapping to isotope + cf.add_bond(cf.add_atom(h.copy(), _skip_calculation=True), n, b.copy(), _skip_calculation=True) groups = groups.split() groups.insert(0, cf) return groups From f3e302dbb20f28f02b9093c332da228643da7837 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 11:02:06 +0100 Subject: [PATCH 40/51] cleaning --- chython/exceptions.py | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/chython/exceptions.py b/chython/exceptions.py index 891340fc..6f47d503 100644 --- a/chython/exceptions.py +++ b/chython/exceptions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -66,30 +66,6 @@ class InvalidAromaticRing(ValueError): """ -class IsConnectedAtom(Exception): - """ - Atom is already attached to graph - """ - - -class IsNotConnectedAtom(Exception): - """ - Atom is not attached to graph - """ - - -class IsConnectedBond(Exception): - """ - Bond is already attached to graph - """ - - -class IsNotConnectedBond(Exception): - """ - Bond is not attached to graph - """ - - class ValenceError(Exception): """ Atom has valence error From a12b4b35fe0e952331603a02094cf86372d02144 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 21:20:13 +0100 Subject: [PATCH 41/51] fixes. better cache management --- chython/algorithms/standardize/reaction.py | 16 ++++++++-------- chython/containers/bonds.py | 4 ++-- chython/containers/graph.py | 2 ++ chython/containers/reaction.py | 7 ++++--- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/chython/algorithms/standardize/reaction.py b/chython/algorithms/standardize/reaction.py index 8f5ab282..e6e5ddff 100644 --- a/chython/algorithms/standardize/reaction.py +++ b/chython/algorithms/standardize/reaction.py @@ -50,7 +50,7 @@ def canonicalize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging total.extend((-1, x, -1, m) for m, x in self.fix_groups_mapping(logging=True)) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) if logging: return total return bool(total) @@ -76,7 +76,7 @@ def standardize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging= total.extend((-1, x, -1, m) for m, x in self.fix_groups_mapping(logging=True)) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) if logging: return total return bool(total) @@ -93,7 +93,7 @@ def thiele(self: 'ReactionContainer', *, fix_tautomers=True) -> bool: if m.thiele(fix_tautomers=fix_tautomers): total = True if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def kekule(self: 'ReactionContainer', *, buffer_size=7) -> bool: @@ -108,7 +108,7 @@ def kekule(self: 'ReactionContainer', *, buffer_size=7) -> bool: if m.kekule(buffer_size=buffer_size): total = True if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def clean_isotopes(self: 'ReactionContainer') -> bool: @@ -121,7 +121,7 @@ def clean_isotopes(self: 'ReactionContainer') -> bool: if m.clean_isotopes(): flag = True if flag: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return flag def clean_stereo(self: 'ReactionContainer'): @@ -130,7 +130,7 @@ def clean_stereo(self: 'ReactionContainer'): """ for m in self.molecules(): m.clean_stereo() - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) def check_valence(self: 'ReactionContainer') -> List[Tuple[int, Tuple[int, ...]]]: """ @@ -155,7 +155,7 @@ def implicify_hydrogens(self: 'ReactionContainer') -> int: for m in self.molecules(): total += m.implicify_hydrogens() if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def explicify_hydrogens(self: 'ReactionContainer') -> int: @@ -203,7 +203,7 @@ def explicify_hydrogens(self: 'ReactionContainer') -> int: m.remap(remap) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def remove_reagents(self, *, keep_reagents: bool = False, mapping: bool = True) -> bool: diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index 43847d51..76e408ce 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -22,13 +22,13 @@ class Bond: __slots__ = ('_order', '_in_ring', '_stereo') - def __init__(self, order: int): + def __init__(self, order: int, *, stereo: Optional[bool] = None): if not isinstance(order, int): raise TypeError('invalid order value') elif order not in (1, 4, 2, 3, 8): raise ValueError('order should be from [1, 2, 3, 4, 8]') self._order = order - self._stereo = None + self._stereo = stereo def __eq__(self, other): if isinstance(other, int): diff --git a/chython/containers/graph.py b/chython/containers/graph.py index f644ebb6..51fb0412 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -165,6 +165,8 @@ def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): u = self.copy() if copy else self u._atoms.update(other._atoms) u._bonds.update(other._bonds) + if not copy: + self.flush_cache() return u def flush_cache(self): diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index 2d154924..ca13c5e5 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -146,10 +146,11 @@ def compose(self) -> CGRContainer: p = MoleculeContainer() return r ^ p - def flush_cache(self, **kwargs): + def flush_cache(self, keep_molecule_cache=False, **kwargs): self.__dict__.clear() - for m in self.molecules(): - m.flush_cache(**kwargs) + if not keep_molecule_cache: + for m in self.molecules(): + m.flush_cache(**kwargs) def pack(self, *, compressed=True, check=True) -> bytes: """ From 057d615e2b967fa941ba0352ef3a4f4a48c8acdc Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Nov 2024 22:30:13 +0100 Subject: [PATCH 42/51] saved --- chython/algorithms/isomorphism.py | 6 +- chython/reactor/base.py | 281 ++++++++++++++---------------- chython/reactor/transformer.py | 3 +- 3 files changed, 135 insertions(+), 155 deletions(-) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 23257e18..dc062591 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -295,10 +295,10 @@ def get_mapping(query, scope): array('I', [n in scope for n in other])) else: components = get_mapping = None + yield from self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, + components=components, get_mapping=get_mapping) + return # todo: implement stereo - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, - components=components, get_mapping=get_mapping) - atoms_stereo = self._atoms_stereo allenes_stereo = self._allenes_stereo cis_trans_stereo = self._cis_trans_stereo diff --git a/chython/reactor/base.py b/chython/reactor/base.py index fae981bc..ca128cbf 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -19,189 +19,170 @@ # from collections import defaultdict from itertools import product +from typing import Union from ..containers import MoleculeContainer, QueryContainer from ..containers.bonds import Bond -from ..periodictable import Element, ListElement, AnyElement, QueryElement +from ..periodictable import Element, ListElement, AnyElement, QueryElement, AnyMetal class BaseReactor: - def __init__(self, reactants, products, delete_atoms, fix_rings, fix_tautomers): - self.__to_delete = reactants.difference(products) if delete_atoms else () - - # prepare atoms patch - self.__elements = elements = {} - self.__hydrogens = hydrogens = {} - self.__variable = variable = [] - - atoms = defaultdict(dict) - if isinstance(products, MoleculeContainer): - # full replacement of atoms - for n, atom in products.atoms(): - elements[n] = atom.copy(hydrogens=True, stereo=True) - for n, atom in products.atoms(): - atoms[n].update(charge=atom.charge, is_radical=atom.is_radical) - if atom.atomic_number: # replace atom - elements[n] = Element.from_atomic_number(atom.atomic_number)(atom.isotope) - if n not in reactants and isinstance(products, MoleculeContainer): - atoms[n]['xy'] = atom.xy - if atom.implicit_hydrogens is not None: - hydrogens[n] = atom.implicit_hydrogens # save available H count - elif n not in reactants: - if not isinstance(atom, ListElement): - raise ValueError('New atom should be defined') - elements[n] = [Element.from_symbol(x)() for x in atom._elements] - variable.append(n) - else: # use atom from reactant - if not isinstance(atom, AnyElement): - raise ValueError('Only AnyElement can be used for matched atom propagation') - elements[n] = None - - if isinstance(products, QueryContainer): - bonds = [] - for n, m, b in products.bonds(): + def __init__(self, pattern, replacement, delete_atoms, fix_rings, fix_tautomers): + if isinstance(replacement, QueryContainer): + for n, a in replacement.atoms(): + if not isinstance(a, (AnyElement, QueryElement)): + raise TypeError('Unsupported query atom type') + for *_, b in replacement.bonds(): if len(b.order) > 1: - raise ValueError('bond list in patch not supported') - else: - bonds.append((n, m, Bond(b.order[0]))) - else: - bonds = [(n, m, b.copy()) for n, m, b in products.bonds()] + raise ValueError('Variable bond in replacement') - self.__bonds = bonds - self.__atom_attrs = dict(atoms) - self.__products = products - self.__fix_rings = fix_rings - self.__fix_tautomers = fix_tautomers + self._to_delete = {n for n, a in pattern.atoms() if not a.masked} - set(replacement) if delete_atoms else () + self._replacement = replacement + self._fix_rings = fix_rings + self._fix_tautomers = fix_tautomers def _patcher(self, structure: MoleculeContainer, mapping): - elements = self.__elements - variable = self.__variable - - new = self.__prepare_skeleton(structure, mapping) - self.__set_stereo(new, structure, mapping) + new = self._prepare_skeleton(structure, mapping) + self._fix_stereo(new, structure, mapping) - if not variable: - if self.__fix_rings: - new.kekule() # keeps stereo as is - if not new.thiele(fix_tautomers=self.__fix_tautomers): # fixes stereo if any ring aromatized - new.fix_stereo() - else: + if self._fix_rings: + new.kekule() # keeps stereo as is + if not new.thiele(fix_tautomers=self._fix_tautomers): # fixes stereo if any ring aromatized new.fix_stereo() - yield new else: - copy = new.copy() - if self.__fix_rings: - copy.kekule() - if not copy.thiele(fix_tautomers=self.__fix_tautomers): - copy.fix_stereo() - else: - copy.fix_stereo() - yield copy + new.fix_stereo() + yield new + + def _get_deleted(self, structure, mapping): + if not self._to_delete: + return set() - for atoms in product(*(elements[x][1:] for x in variable)): - copy = new.copy() - for n, atom in zip(variable, atoms): - n = mapping[n] - # replace atom - copy._atoms[n] = a = atom.copy() # noqa - a._attach_graph(copy, n) # noqa - copy.calc_implicit(n) # noqa - if self.__fix_rings: - copy.kekule() - if not copy.thiele(fix_tautomers=self.__fix_tautomers): - copy.fix_stereo() - else: - copy.fix_stereo() + bonds = structure._bonds + to_delete = {mapping[x] for x in self._to_delete} + # if deleted atoms have another path to remain fragment, the path is preserved + remain = set(mapping.values()).difference(to_delete) + delete, global_seen = set(), set() + for x in to_delete: + for n in bonds[x]: + if n in global_seen or n in remain: + continue + seen = {n} + global_seen.add(n) + stack = [x for x in bonds[n] if x not in global_seen] + while stack: + current = stack.pop() + if current in remain: + break + if current in to_delete: + continue + seen.add(current) + global_seen.add(current) + stack.extend([x for x in bonds[current] if x not in global_seen]) else: - copy.fix_stereo() - yield copy + delete.update(seen) - def __prepare_skeleton(self, structure, mapping): - elements = self.__elements - patch_hydrogens = self.__hydrogens - patch_bonds = self.__bonds - variable = self.__variable + to_delete.update(delete) + return to_delete + def _prepare_skeleton(self, structure, mapping): atoms = structure._atoms - plane = structure._plane bonds = structure._bonds - charges = structure._charges - radicals = structure._radicals - hydrogens = structure._hydrogens - - to_delete = {mapping[x] for x in self.__to_delete} - if to_delete: - # if deleted atoms have another path to remain fragment, the path is preserved - remain = set(mapping.values()).difference(to_delete) - delete, global_seen = set(), set() - for x in to_delete: - for n in bonds[x]: - if n in global_seen or n in remain: - continue - seen = {n} - global_seen.add(n) - stack = [x for x in bonds[n] if x not in global_seen] - while stack: - current = stack.pop() - if current in remain: - break - if current in to_delete: - continue - seen.add(current) - global_seen.add(current) - stack.extend([x for x in bonds[current] if x not in global_seen]) - else: - delete.update(seen) - - to_delete.update(delete) + to_delete = self._get_deleted(structure, mapping) new = structure.__class__() - keep_hydrogens = {} + natoms = new._atoms + nbonds = new._bonds max_atom = max(atoms) - for n, atom in self.__atom_attrs.items(): - if n in mapping: # add matched atoms - m = mapping[n] - e = elements[n] - if e is None: - e = atoms[m] - new.add_atom(e.copy(), m, xy=plane[m], _skip_hydrogen_calculation=True, **atom) - else: # new atoms - max_atom += 1 - if n in variable: - # use first from the list - mapping[n] = new.add_atom(elements[n][0].copy(), max_atom, _skip_hydrogen_calculation=True, **atom) + stereo_atoms = [] + stereo_bonds = [] + + for n, a in self._replacement.atoms(): + if isinstance(a, AnyElement): + if n := mapping.get(n): + # keep matched atom type and isotope + e = atoms[n].copy(stereo=True) + e.charge = a.charge + e.is_radical = a.is_radical + if a.stereo is not None: # override stereo + e._stereo = a.stereo + elif e.stereo is not None: # keep original stereo + stereo_atoms.append(n) # mark for stereo fix + natoms[n] = e + nbonds[n] = {} + else: + raise ValueError("AnyElement doesn't match to pattern") + else: # QueryElement or Element + a: Union[QueryElement, Element] # typehint + e = Element.from_atomic_number(a.atomic_number) + e = e(a.isotope, charge=a.charge, is_radical=a.is_radical, stereo=a.stereo) + if not (m := mapping.get(n)): # new atom + m = max_atom + 1 + max_atom += 1 + mapping[n] = m + if isinstance(a, Element): + e._implicit_hydrogens = a.implicit_hydrogens # keep H count from patch + e.x = a.x # keep coordinates from patch + e.y = a.y + elif len(a.implicit_hydrogens) == 1: + e._implicit_hydrogens = a.implicit_hydrogens[0] + elif a.implicit_hydrogens: + raise ValueError('Query element in patch has more than one implicit hydrogen') + else: # existing atoms + b = atoms[m] + e.x = b.x # preserve existing coordinates + e.y = b.y + if a.stereo is None and b.stereo is not None: # keep original stereo + e._stereo = b.stereo + stereo_atoms.append(m) + natoms[m] = e + nbonds[m] = {} + + # preserve connectivity order + for n, bs in self._replacement._bonds.items(): + n = mapping[n] + for m, b in bs.items(): + m = mapping[m] + if n in nbonds[m]: + nbonds[n][m] = nbonds[m][n] else: - mapping[n] = new.add_atom(elements[n].copy(), max_atom, _skip_hydrogen_calculation=True, **atom) - if n in patch_hydrogens: # keep patch aromatic atoms hydrogens count - keep_hydrogens[max_atom] = patch_hydrogens[n] + nbonds[n][m] = b = Bond(int(b), stereo=b.stereo) + if b.stereo is None: + if not (nb := bonds.get(n)): + continue + if not (mb := nb.get(m)): + continue + if mb.stereo is None: + continue + # original structure has stereo bond + b._stereo = mb.stereo + stereo_bonds.append((n, m)) patch_atoms = set(new) # don't move! - for n, atom in structure.atoms(): # add unmatched atoms + for n, a in atoms.items(): # add unmatched or masked atoms if n not in patch_atoms and n not in to_delete: - new.add_atom(atom.copy(), n, charge=charges[n], is_radical=radicals[n], xy=plane[n], - _skip_hydrogen_calculation=True) - keep_hydrogens[n] = hydrogens[n] # keep hydrogens on unmatched atoms as is. - - for n, m, bond in patch_bonds: # add patch bonds - new.add_bond(mapping[n], mapping[m], bond.copy(), _skip_hydrogen_calculation=True) + natoms[n] = a.copy(hydrogens=True, stereo=True) + nbonds[n] = {} - for n, m_bond in bonds.items(): + for n, bs in bonds.items(): if n in to_delete: # atoms for removing continue - to_delete.add(n) # reuse to_delete set for seen atoms - for m, bond in m_bond.items(): + for m, b in bs.items(): # ignore deleted atoms and patch atoms if m in to_delete or n in patch_atoms and m in patch_atoms: continue - new.add_bond(n, m, bond.copy(), _skip_hydrogen_calculation=True) - - # fix hydrogens count. - new._hydrogens.update(keep_hydrogens) # noqa - for n in new: - if n not in keep_hydrogens: - new.calc_implicit(n) # noqa + elif n in nbonds[m]: + nbonds[n][m] = nbonds[m][n] + else: + nbonds[n][m] = b.copy(stereo=True) + if b.stereo is not None and (n in patch_atoms or m in patch_atoms): + stereo_bonds.append((n, m)) + + for n, a in new.atoms(): + if a.implicit_hydrogens is None: + new.calc_implicit(n) + new.calc_labels() return new - def __set_stereo(self, new, structure, mapping): + def _fix_stereo(self, new, structure, mapping): products = self.__products stereo_override = set() r_mapping = {m: n for n, m in mapping.items()} diff --git a/chython/reactor/transformer.py b/chython/reactor/transformer.py index d2be81e7..5852cc47 100644 --- a/chython/reactor/transformer.py +++ b/chython/reactor/transformer.py @@ -47,8 +47,7 @@ def __init__(self, pattern: QueryContainer, replacement: Union[MoleculeContainer self.replacement = replacement self.__automorphism_filter = automorphism_filter self.__copy_metadata = copy_metadata - super().__init__({n for n, h in pattern._masked.items() if not h}, replacement, delete_atoms, - fix_aromatic_rings, fix_tautomers) + super().__init__(pattern, replacement, delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, structure: MoleculeContainer): if not isinstance(structure, MoleculeContainer): From f294c3c44f644eab3c0db730374df5656ee3a923 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Nov 2024 17:52:47 +0100 Subject: [PATCH 43/51] Refactor: Update mdl module import paths Renamed '_mdl' directory to 'mdl' and updated all corresponding import statements. This change improves code readability and aligns directory naming conventions across the project. --- chython/files/MRVrw.py | 2 +- chython/files/RDFrw.py | 4 ++-- chython/files/SDFrw.py | 2 +- chython/files/{_mdl => mdl}/__init__.py | 0 chython/files/{_mdl => mdl}/emol.py | 0 chython/files/{_mdl => mdl}/erxn.py | 0 chython/files/{_mdl => mdl}/mol.py | 0 chython/files/{_mdl => mdl}/read.py | 0 chython/files/{_mdl => mdl}/rxn.py | 0 chython/files/{_mdl => mdl}/stereo.py | 0 chython/files/{_mdl => mdl}/write.py | 0 11 files changed, 4 insertions(+), 4 deletions(-) rename chython/files/{_mdl => mdl}/__init__.py (100%) rename chython/files/{_mdl => mdl}/emol.py (100%) rename chython/files/{_mdl => mdl}/erxn.py (100%) rename chython/files/{_mdl => mdl}/mol.py (100%) rename chython/files/{_mdl => mdl}/read.py (100%) rename chython/files/{_mdl => mdl}/rxn.py (100%) rename chython/files/{_mdl => mdl}/stereo.py (100%) rename chython/files/{_mdl => mdl}/write.py (100%) diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index ab969b21..543f33dd 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -24,7 +24,7 @@ from typing import Union, List, Iterator, Dict, Optional from ._convert import create_molecule, create_reaction from ._mapping import postprocess_parsed_molecule, postprocess_parsed_reaction -from ._mdl import postprocess_molecule +from .mdl import postprocess_molecule from ..containers import MoleculeContainer, ReactionContainer from ..exceptions import EmptyMolecule, EmptyReaction diff --git a/chython/files/RDFrw.py b/chython/files/RDFrw.py index 62bebbae..9e8a20f2 100644 --- a/chython/files/RDFrw.py +++ b/chython/files/RDFrw.py @@ -25,8 +25,8 @@ from sys import platform from time import strftime from typing import Union, Dict, List -from ._mdl import (MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, parse_rxn_v2000, parse_rxn_v3000, - postprocess_molecule) +from .mdl import (MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, parse_rxn_v2000, parse_rxn_v3000, + postprocess_molecule) from ._convert import create_molecule, create_reaction from ._mapping import postprocess_parsed_molecule, postprocess_parsed_reaction from ..containers import ReactionContainer, MoleculeContainer diff --git a/chython/files/SDFrw.py b/chython/files/SDFrw.py index 04edb0ad..232f3fe6 100644 --- a/chython/files/SDFrw.py +++ b/chython/files/SDFrw.py @@ -23,7 +23,7 @@ from subprocess import check_output from sys import platform from typing import Optional, List -from ._mdl import MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, postprocess_molecule +from .mdl import MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, postprocess_molecule from ._convert import create_molecule from ._mapping import postprocess_parsed_molecule from ..containers import MoleculeContainer diff --git a/chython/files/_mdl/__init__.py b/chython/files/mdl/__init__.py similarity index 100% rename from chython/files/_mdl/__init__.py rename to chython/files/mdl/__init__.py diff --git a/chython/files/_mdl/emol.py b/chython/files/mdl/emol.py similarity index 100% rename from chython/files/_mdl/emol.py rename to chython/files/mdl/emol.py diff --git a/chython/files/_mdl/erxn.py b/chython/files/mdl/erxn.py similarity index 100% rename from chython/files/_mdl/erxn.py rename to chython/files/mdl/erxn.py diff --git a/chython/files/_mdl/mol.py b/chython/files/mdl/mol.py similarity index 100% rename from chython/files/_mdl/mol.py rename to chython/files/mdl/mol.py diff --git a/chython/files/_mdl/read.py b/chython/files/mdl/read.py similarity index 100% rename from chython/files/_mdl/read.py rename to chython/files/mdl/read.py diff --git a/chython/files/_mdl/rxn.py b/chython/files/mdl/rxn.py similarity index 100% rename from chython/files/_mdl/rxn.py rename to chython/files/mdl/rxn.py diff --git a/chython/files/_mdl/stereo.py b/chython/files/mdl/stereo.py similarity index 100% rename from chython/files/_mdl/stereo.py rename to chython/files/mdl/stereo.py diff --git a/chython/files/_mdl/write.py b/chython/files/mdl/write.py similarity index 100% rename from chython/files/_mdl/write.py rename to chython/files/mdl/write.py From df0b08c3baaebe2399d4a2635eded5c9813d38d2 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Nov 2024 17:55:22 +0100 Subject: [PATCH 44/51] Fixed stereo parsing bug --- chython/files/daylight/parser.py | 6 +++++- chython/files/daylight/smiles.py | 6 ++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/chython/files/daylight/parser.py b/chython/files/daylight/parser.py index f685a359..42d1583b 100644 --- a/chython/files/daylight/parser.py +++ b/chython/files/daylight/parser.py @@ -37,6 +37,7 @@ def parser(tokens, strong_cycle): last_num = 0 stack = [] cycles = {} + stereo_atoms = {} stereo_bonds = defaultdict(dict) previous = None @@ -135,6 +136,8 @@ def parser(tokens, strong_cycle): # else bt == 4 - skip dot previous = None + if 'stereo' in token: + stereo_atoms[atom_num] = token.pop('stereo') atoms.append(token) atoms_types.append(token_type) last_num = atom_num @@ -147,7 +150,8 @@ def parser(tokens, strong_cycle): elif previous: raise IncorrectSmiles('bond on the end') - return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_bonds': stereo_bonds, 'log': log} + return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_atoms': stereo_atoms, + 'stereo_bonds': stereo_bonds, 'log': log} __all__ = ['parser'] diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 442195f8..60630ba0 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -170,9 +170,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if ignore_stereo: return - - stereo_atoms = [(n, s) for n, a in enumerate(data['atoms']) if (s := a.get('stereo')) is not None] - if not stereo_atoms and not data['stereo_bonds']: + elif not data['stereo_atoms'] or not data['stereo_bonds']: return atoms = molecule._atoms @@ -185,7 +183,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): log = [] stereo = [] - for i, s in stereo_atoms: + for i, s in data['stereo_atoms'].items(): n = mapping[i] if not i and atoms[n].implicit_hydrogens: # first atom in smiles has reversed chiral mark s = not s From 3e1799e28ecace6e12f9771e96cad5a580f362e1 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Nov 2024 17:59:35 +0100 Subject: [PATCH 45/51] Reactors refactoring started --- chython/reactor/base.py | 204 +++++++++++++-------------------- chython/reactor/transformer.py | 17 ++- 2 files changed, 86 insertions(+), 135 deletions(-) diff --git a/chython/reactor/base.py b/chython/reactor/base.py index ca128cbf..acfe4cc0 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -17,12 +17,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from collections import defaultdict -from itertools import product from typing import Union from ..containers import MoleculeContainer, QueryContainer from ..containers.bonds import Bond -from ..periodictable import Element, ListElement, AnyElement, QueryElement, AnyMetal +from ..periodictable import Element, AnyElement, QueryElement class BaseReactor: @@ -40,18 +38,6 @@ def __init__(self, pattern, replacement, delete_atoms, fix_rings, fix_tautomers) self._fix_rings = fix_rings self._fix_tautomers = fix_tautomers - def _patcher(self, structure: MoleculeContainer, mapping): - new = self._prepare_skeleton(structure, mapping) - self._fix_stereo(new, structure, mapping) - - if self._fix_rings: - new.kekule() # keeps stereo as is - if not new.thiele(fix_tautomers=self._fix_tautomers): # fixes stereo if any ring aromatized - new.fix_stereo() - else: - new.fix_stereo() - yield new - def _get_deleted(self, structure, mapping): if not self._to_delete: return set() @@ -83,7 +69,7 @@ def _get_deleted(self, structure, mapping): to_delete.update(delete) return to_delete - def _prepare_skeleton(self, structure, mapping): + def _patcher(self, structure: MoleculeContainer, mapping): atoms = structure._atoms bonds = structure._bonds @@ -95,156 +81,122 @@ def _prepare_skeleton(self, structure, mapping): stereo_atoms = [] stereo_bonds = [] - for n, a in self._replacement.atoms(): - if isinstance(a, AnyElement): - if n := mapping.get(n): + for n, ra in self._replacement.atoms(): + if isinstance(ra, AnyElement): + if m := mapping.get(n): # keep matched atom type and isotope - e = atoms[n].copy(stereo=True) - e.charge = a.charge - e.is_radical = a.is_radical - if a.stereo is not None: # override stereo - e._stereo = a.stereo - elif e.stereo is not None: # keep original stereo - stereo_atoms.append(n) # mark for stereo fix - natoms[n] = e - nbonds[n] = {} + sa = atoms[m] + a = sa.copy() + a.charge = ra.charge + a.is_radical = ra.is_radical + if ra.stereo is not None: # override stereo + a._stereo = ra.stereo + elif sa.stereo is not None: # keep original stereo + stereo_atoms.append(m) # mark for stereo fix else: raise ValueError("AnyElement doesn't match to pattern") else: # QueryElement or Element - a: Union[QueryElement, Element] # typehint - e = Element.from_atomic_number(a.atomic_number) - e = e(a.isotope, charge=a.charge, is_radical=a.is_radical, stereo=a.stereo) + ra: Union[QueryElement, Element] # typehint + e = Element.from_atomic_number(ra.atomic_number) + a = e(ra.isotope, charge=ra.charge, is_radical=ra.is_radical) if not (m := mapping.get(n)): # new atom m = max_atom + 1 max_atom += 1 mapping[n] = m - if isinstance(a, Element): - e._implicit_hydrogens = a.implicit_hydrogens # keep H count from patch - e.x = a.x # keep coordinates from patch - e.y = a.y - elif len(a.implicit_hydrogens) == 1: - e._implicit_hydrogens = a.implicit_hydrogens[0] - elif a.implicit_hydrogens: + a._stereo = ra.stereo # keep stereo from patch for new atoms + if isinstance(ra, Element): + a._implicit_hydrogens = ra.implicit_hydrogens # keep H count from patch + a.x = ra.x # keep coordinates from patch + a.y = ra.y + elif len(ra.implicit_hydrogens) == 1: # keep H count from patch + a._implicit_hydrogens = ra.implicit_hydrogens[0] + elif ra.implicit_hydrogens: raise ValueError('Query element in patch has more than one implicit hydrogen') else: # existing atoms - b = atoms[m] - e.x = b.x # preserve existing coordinates - e.y = b.y - if a.stereo is None and b.stereo is not None: # keep original stereo - e._stereo = b.stereo + sa = atoms[m] + a.x = sa.x # preserve existing coordinates + a.y = sa.y + if ra.stereo is not None: + a._stereo = ra.stereo + elif sa.stereo is not None: # keep original stereo stereo_atoms.append(m) - natoms[m] = e - nbonds[m] = {} + natoms[m] = a + nbonds[m] = {} # preserve connectivity order for n, bs in self._replacement._bonds.items(): n = mapping[n] - for m, b in bs.items(): + for m, rb in bs.items(): m = mapping[m] - if n in nbonds[m]: + if n in nbonds[m]: # back-link nbonds[n][m] = nbonds[m][n] else: - nbonds[n][m] = b = Bond(int(b), stereo=b.stereo) - if b.stereo is None: - if not (nb := bonds.get(n)): - continue - if not (mb := nb.get(m)): - continue - if mb.stereo is None: - continue - # original structure has stereo bond - b._stereo = mb.stereo + nbonds[n][m] = b = Bond(int(rb)) + if rb.stereo is not None: # override stereo + b._stereo = rb.stereo + elif (sbn := bonds.get(n)) is None or (sb := sbn.get(m)) is None or sb.stereo is None: + continue + else: # original structure has stereo bond stereo_bonds.append((n, m)) - patch_atoms = set(new) # don't move! + patched_atoms = set(new) for n, a in atoms.items(): # add unmatched or masked atoms - if n not in patch_atoms and n not in to_delete: + if n not in patched_atoms and n not in to_delete: natoms[n] = a.copy(hydrogens=True, stereo=True) nbonds[n] = {} - for n, bs in bonds.items(): + for n, bs in bonds.items(): # preserve connectivity order for keeping stereo labels as is if n in to_delete: # atoms for removing continue for m, b in bs.items(): # ignore deleted atoms and patch atoms - if m in to_delete or n in patch_atoms and m in patch_atoms: + if m in to_delete or n in patched_atoms and m in patched_atoms: continue - elif n in nbonds[m]: + elif n in nbonds[m]: # back-link nbonds[n][m] = nbonds[m][n] + elif b.stereo is not None and (n in patched_atoms or m in patched_atoms): + # unmatched/masked atoms to patched atoms linker bonds + # stereo label should be recalculated + nbonds[n][m] = b.copy() + stereo_bonds.append((n, m)) else: nbonds[n][m] = b.copy(stereo=True) - if b.stereo is not None and (n in patch_atoms or m in patch_atoms): - stereo_bonds.append((n, m)) for n, a in new.atoms(): if a.implicit_hydrogens is None: new.calc_implicit(n) new.calc_labels() - return new - def _fix_stereo(self, new, structure, mapping): - products = self.__products - stereo_override = set() - r_mapping = {m: n for n, m in mapping.items()} - - # set patch atoms stereo - for n, s in products._atoms_stereo.items(): - m = mapping[n] - new._atoms_stereo[m] = products._translate_tetrahedron_sign(n, [r_mapping[x] for x in - new.stereogenic_tetrahedrons[m]], s) - stereo_override.add(m) - - for n, s in products._allenes_stereo.items(): - m = mapping[n] - t1, t2, *_ = new.stereogenic_allenes[m] - new._allenes_stereo[m] = products._translate_allene_sign(n, r_mapping[t1], r_mapping[t2], s) - stereo_override.add(m) - - for (n, m), s in products._cis_trans_stereo.items(): - nm = (mapping[n], mapping[m]) - try: - t1, t2, *_ = new.stereogenic_cis_trans[nm] - except KeyError: - nm = nm[::-1] - t2, t1, *_ = new.stereogenic_cis_trans[nm] - new._cis_trans_stereo[nm] = products._translate_cis_trans_sign(n, m, r_mapping[t1], r_mapping[t2], s) - stereo_override.update(nm) - - # set unmatched part stereo and not overridden by patch. - for n, s in structure._atoms_stereo.items(): - if n in stereo_override or n not in new.stereogenic_tetrahedrons or \ - new._bonds[n].keys() != structure._bonds[n].keys(): - # skip atoms with changed neighbors - continue - new._atoms_stereo[n] = structure._translate_tetrahedron_sign(n, new.stereogenic_tetrahedrons[n], s) - - for n, s in structure._allenes_stereo.items(): - if n in stereo_override or n not in new.stereogenic_allenes or \ - set(new.stereogenic_allenes[n]) != set(structure.stereogenic_allenes[n]): - # skip changed allenes - continue - t1, t2, *_ = new.stereogenic_allenes[n] - new._allenes_stereo[n] = structure._translate_allene_sign(n, t1, t2, s) + # translate stereo sign from old order to new order + for n in stereo_atoms: + if n in new.stereogenic_tetrahedrons: + if bonds[n].keys() != nbonds[n].keys(): + # flush stereo from reaction center. should be explicitly set in replacement. + continue + s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], atoms[n].stereo) + natoms[n]._stereo = s + elif n in new.stereogenic_allenes: + if set(new.stereogenic_allenes[n]) != set(structure.stereogenic_allenes[n]): + # flush stereo for changed allene substituents + continue + s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], atoms[n].stereo) + natoms[n]._stereo = s + # else: ignore label - for nm, s in structure._cis_trans_stereo.items(): - n, m = nm - if n in stereo_override or m in stereo_override: - continue - env = structure.stereogenic_cis_trans[nm] - try: - new_env = new.stereogenic_cis_trans[nm] - except KeyError: - nm = nm[::-1] - try: - new_env = new.stereogenic_cis_trans[nm] - except KeyError: + for n, m in stereo_bonds: + if (t12 := new._stereo_cis_trans_terminals.get(n, True)) == new._stereo_cis_trans_terminals.get(m, False): + if set(new.stereogenic_cis_trans[t12]) != set(structure.stereogenic_cis_trans[t12]): continue - t2, t1, *_ = new_env - else: - t1, t2, *_ = new_env - if set(env) != set(new_env): - continue - new._cis_trans_stereo[nm] = structure._translate_cis_trans_sign(n, m, t1, t2, s) + new._translate_cis_trans_sign(*t12, *structure.stereogenic_cis_trans[t12][:2], bonds[n][m].stereo) + # else: ignore label + + if self._fix_rings: + new.kekule() # keeps stereo as is + if not new.thiele(fix_tautomers=self._fix_tautomers): # fixes stereo if any ring aromatized + new.fix_stereo() + else: + new.fix_stereo() + return new __all__ = ['BaseReactor'] diff --git a/chython/reactor/transformer.py b/chython/reactor/transformer.py index 5852cc47..1ca11099 100644 --- a/chython/reactor/transformer.py +++ b/chython/reactor/transformer.py @@ -43,21 +43,20 @@ def __init__(self, pattern: QueryContainer, replacement: Union[MoleculeContainer if not isinstance(pattern, QueryContainer) or not isinstance(replacement, (MoleculeContainer, QueryContainer)): raise TypeError('invalid params') - self.pattern = pattern - self.replacement = replacement - self.__automorphism_filter = automorphism_filter - self.__copy_metadata = copy_metadata + self._pattern = pattern + self._automorphism_filter = automorphism_filter + self._copy_metadata = copy_metadata super().__init__(pattern, replacement, delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, structure: MoleculeContainer): if not isinstance(structure, MoleculeContainer): raise TypeError('only Molecules possible') - for mapping in self.pattern.get_mapping(structure, automorphism_filter=self.__automorphism_filter): - for transformed in self._patcher(structure, mapping): - if self.__copy_metadata: - transformed.meta.update(structure.meta) - yield transformed + for mapping in self._pattern.get_mapping(structure, automorphism_filter=self._automorphism_filter): + transformed = self._patcher(structure, mapping) + if self._copy_metadata: + transformed.meta.update(structure.meta) + yield transformed __all__ = ['Transformer'] From 907ed2cac0dacb79f8c155b405b121e031358f8e Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Nov 2024 18:47:50 +0100 Subject: [PATCH 46/51] fixes --- chython/files/daylight/parser.py | 2 +- chython/files/daylight/smiles.py | 2 +- chython/reactor/base.py | 44 ++++++++++++++++++-------------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/chython/files/daylight/parser.py b/chython/files/daylight/parser.py index 42d1583b..f45d020c 100644 --- a/chython/files/daylight/parser.py +++ b/chython/files/daylight/parser.py @@ -136,7 +136,7 @@ def parser(tokens, strong_cycle): # else bt == 4 - skip dot previous = None - if 'stereo' in token: + if token.get('stereo') is not None: stereo_atoms[atom_num] = token.pop('stereo') atoms.append(token) atoms_types.append(token_type) diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 60630ba0..293597ac 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -170,7 +170,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): if ignore_stereo: return - elif not data['stereo_atoms'] or not data['stereo_bonds']: + elif not data['stereo_atoms'] and not data['stereo_bonds']: return atoms = molecule._atoms diff --git a/chython/reactor/base.py b/chython/reactor/base.py index acfe4cc0..ca39685a 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -81,6 +81,8 @@ def _patcher(self, structure: MoleculeContainer, mapping): stereo_atoms = [] stereo_bonds = [] + # let's preserve connectivity order from replacement to keep stereo signs as is. + # stereo labels from original structure will be recalculated after full molecule construction. for n, ra in self._replacement.atoms(): if isinstance(ra, AnyElement): if m := mapping.get(n): @@ -140,10 +142,17 @@ def _patcher(self, structure: MoleculeContainer, mapping): stereo_bonds.append((n, m)) patched_atoms = set(new) - for n, a in atoms.items(): # add unmatched or masked atoms + for n, sa in atoms.items(): # add unmatched or masked atoms if n not in patched_atoms and n not in to_delete: - natoms[n] = a.copy(hydrogens=True, stereo=True) + natoms[n] = a = sa.copy(hydrogens=True) nbonds[n] = {} + if sa.stereo is not None: + # in case of allenes label can disappear/change, thus, requires recalculation + # for tetrahedrons label can be stored as is + if len(bonds[n]) >= 3: + a._stereo = sa.stereo + else: + stereo_atoms.append(n) for n, bs in bonds.items(): # preserve connectivity order for keeping stereo labels as is if n in to_delete: # atoms for removing @@ -154,13 +163,11 @@ def _patcher(self, structure: MoleculeContainer, mapping): continue elif n in nbonds[m]: # back-link nbonds[n][m] = nbonds[m][n] - elif b.stereo is not None and (n in patched_atoms or m in patched_atoms): - # unmatched/masked atoms to patched atoms linker bonds - # stereo label should be recalculated - nbonds[n][m] = b.copy() - stereo_bonds.append((n, m)) else: - nbonds[n][m] = b.copy(stereo=True) + nbonds[n][m] = b.copy() + if b.stereo is not None: + # stereo label should be recalculated + stereo_bonds.append((n, m)) for n, a in new.atoms(): if a.implicit_hydrogens is None: @@ -170,24 +177,23 @@ def _patcher(self, structure: MoleculeContainer, mapping): # translate stereo sign from old order to new order for n in stereo_atoms: if n in new.stereogenic_tetrahedrons: - if bonds[n].keys() != nbonds[n].keys(): + if bonds[n].keys() == nbonds[n].keys(): # flush stereo from reaction center. should be explicitly set in replacement. - continue - s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], atoms[n].stereo) - natoms[n]._stereo = s + s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], atoms[n].stereo) + natoms[n]._stereo = s elif n in new.stereogenic_allenes: - if set(new.stereogenic_allenes[n]) != set(structure.stereogenic_allenes[n]): + if set(new.stereogenic_allenes[n]) == set(structure.stereogenic_allenes[n]): # flush stereo for changed allene substituents - continue - s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], atoms[n].stereo) - natoms[n]._stereo = s + s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], atoms[n].stereo) + natoms[n]._stereo = s # else: ignore label for n, m in stereo_bonds: if (t12 := new._stereo_cis_trans_terminals.get(n, True)) == new._stereo_cis_trans_terminals.get(m, False): - if set(new.stereogenic_cis_trans[t12]) != set(structure.stereogenic_cis_trans[t12]): - continue - new._translate_cis_trans_sign(*t12, *structure.stereogenic_cis_trans[t12][:2], bonds[n][m].stereo) + if set(new.stereogenic_cis_trans[t12]) == set(env := structure.stereogenic_cis_trans[t12]): + # connected to cumulenes atoms should be the same + s = new._translate_cis_trans_sign(*t12, *env[:2], bonds[n][m].stereo) + nbonds[n][m]._stereo = s # else: ignore label if self._fix_rings: From 8d3994eed0e186e88dfe4866a55cb2f7752843e4 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Wed, 11 Dec 2024 09:22:44 +0100 Subject: [PATCH 47/51] WIP: pach support fixes --- chython/__init__.py | 4 +- chython/algorithms/stereo.py | 4 ++ chython/containers/__init__.py | 7 ++- chython/containers/_pack.pyx | 107 +++++++++++++++++---------------- 4 files changed, 65 insertions(+), 57 deletions(-) diff --git a/chython/__init__.py b/chython/__init__.py index 0c860191..b695b7b2 100644 --- a/chython/__init__.py +++ b/chython/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # Copyright 2014-2019 Timur Madzhidov tmadzhidov@gmail.com features and API discussion # Copyright 2014-2019 Alexandre Varnek base idea of CGR approach # This file is part of chython. @@ -25,7 +25,7 @@ from .utils import * -pickle_cache = False # store cached attributes in pickle torch_device = 'cpu' # AAM model device. Change before first `reset_mapping` call! + __all__ = [] diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index fd19fa75..80f87049 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -630,6 +630,10 @@ def fix_stereo(self: 'MoleculeContainer'): old_stereo = fail_stereo self.flush_stereo_cache() + @cached_property + def _cis_trans_count(self) -> int: + return sum(b.stereo is not None for *_, b in self.bonds()) + @cached_property def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: """ diff --git a/chython/containers/__init__.py b/chython/containers/__init__.py index 6658eeaa..0f2f3dbb 100644 --- a/chython/containers/__init__.py +++ b/chython/containers/__init__.py @@ -36,7 +36,8 @@ def unpach(data: bytes, /, *, compressed=True) -> Union[MoleculeContainer, React return ReactionContainer.unpack(data, compressed=False) +unpack = unpach + + __all__ = [x for x in locals() if x.endswith('Container')] -__all__.append('Bond') -__all__.append('QueryBond') -__all__.append('unpach') +__all__.extend(['Bond', 'QueryBond', 'unpack', 'unpach']) diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack.pyx index fa61afc0..fe024654 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -53,35 +53,26 @@ from libc.math cimport ldexp, frexp @cython.cdivision(True) @cython.wraparound(False) def pack(object molecule): - cdef bint b # binary flag + cdef bint b = True # binary flag cdef char charge - cdef unsigned char atomic_number, isotope, bond, s = 0, buffer_b, buffer_o - cdef unsigned char *p, *data + cdef unsigned char atomic_number, ngb_count, isotope, bond, s = 0, buffer_b, buffer_o, stereo, hcr + cdef unsigned char *data cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift # can be > 2^16 - cdef unsigned char[4096] stereo, hcr, seen - cdef unsigned int[4096] xy # 2 * 16bit + cdef unsigned char[4096] seen cdef bytes py_pack - cdef dict py_ngb, py_atoms, py_bonds, py_charges, py_radicals, py_hydrogens, py_plane - cdef dict py_cis_trans_stereo, py_atoms_stereo, py_allenes_stereo + cdef dict py_ngb, py_atoms, py_bonds cdef tuple py_tuple cdef object py_atom, py_bond, py_nan_int, py_obj # map molecule to vars py_atoms = molecule._atoms py_bonds = molecule._bonds - py_charges = molecule._charges - py_radicals = molecule._radicals - py_hydrogens = molecule._hydrogens - py_cis_trans_stereo = molecule._cis_trans_stereo - py_atoms_stereo = molecule._atoms_stereo - py_allenes_stereo = molecule._allenes_stereo - py_plane = molecule._plane # calculate elements count atoms_count = len(py_atoms) - cis_trans_count = len(py_cis_trans_stereo) + cis_trans_count = molecule._cis_trans_count for py_ngb in py_bonds.values(): bonds_count += len(py_ngb) @@ -103,64 +94,76 @@ def pack(object molecule): if not data: raise MemoryError() - # precalculate atom attrs - # should be done independently, due to possible randomness in dicts order. - # 3 bit - hydrogens (0-7) | 4 bit - charge | 1 bit - radical - for n, py_nan_int in py_hydrogens.items(): - if py_nan_int is None: - hcr[n] = 0xe0 # 0b11100000 - else: - hcr[n] = py_nan_int << 5 - for n, charge in py_charges.items(): - hcr[n] |= (charge + 4) << 1 - for n, b in py_radicals.items(): - if b: # lazy memory access - hcr[n] |= 1 - - # 2 float16 big endian - for n, py_tuple in py_plane.items(): - p = &xy[n] - double_to_float16(py_tuple[0], &p[0]) - double_to_float16(py_tuple[1], &p[2]) - - # erase random data - seen[n] = 0 - stereo[n] = 0 - - # 2 bit tetrahedron | 2 bit allene | 0000 - for n, b in py_atoms_stereo.items(): - stereo[n] = 0xc0 if b else 0x80 - for n, b in py_allenes_stereo.items(): - stereo[n] = 0x30 if b else 0x20 - # start pack collection data[0] = 2 # header. specification version 2 data[1] = atoms_count >> 4 # 5-12b of atom count value data[2] = atoms_count << 4 | cis_trans_count >> 8 # 1-4b of atom count value, 9-12b of cis-trans count value data[3] = cis_trans_count # 1-8b of cis-trans count value - b = True # init connection table flag for py_obj, py_atom in py_atoms.items(): py_ngb = py_bonds[py_obj] + ngb_count = len(py_ngb) n = py_obj # cast to C seen[n] = 1 - p = &xy[n] # XY atomic_number = py_atom.atomic_number - py_nan_int = py_atom._Core__isotope # direct access + + py_nan_int = py_atom._isotope # direct access if py_nan_int is None: isotope = 0 else: isotope = py_nan_int - common_isotopes[atomic_number] + py_nan_int = py_atom._stereo + if py_nan_int is None: + stereo = 0 + # V2 specification + # 2 bit tetrahedron | 2 bit allene | 0000 + elif py_nan_int: + if ngb_count == 2: + stereo = 0x30 + else: + stereo = 0xc0 + else: + if ngb_count == 2: + stereo = 0x20 + else: + stereo = 0x80 + + # precalculate atom attrs + # should be done independently, due to possible randomness in dicts order. + # 3 bit - hydrogens (0-7) | 4 bit - charge | 1 bit - radical + py_nan_int = py_atom._implicit_hydrogens + if py_nan_int is None: + hcr = 0xe0 # 0b11100000 + else: + hcr = py_nan_int << 5 + + charge = py_atom._charge + hcr |= (charge + 4) << 1 + if py_atom._is_radical: + hcr |= 1 + data[atoms_shift] = n >> 4 # 5-12b AN - data[atoms_shift + 1] = n << 4 | len(py_ngb) # 1-4b AN, 4b NC - data[atoms_shift + 2] = stereo[n] | isotope >> 1 # TS , AS , 4b I + data[atoms_shift + 1] = n << 4 | ngb_count # 1-4b AN, 4b NC + data[atoms_shift + 2] = stereo | isotope >> 1 # TS , AS , 4b I data[atoms_shift + 3] = isotope << 7 | atomic_number # 1bI , A + + # 2 float16 big endian + for n, py_tuple in py_plane.items(): + p = &xy[n] + double_to_float16(py_tuple[0], &p[0]) + double_to_float16(py_tuple[1], &p[2]) + + # erase random data + seen[n] = 0 + stereo[n] = 0 + data[atoms_shift + 4] = p[0] data[atoms_shift + 5] = p[1] data[atoms_shift + 6] = p[2] data[atoms_shift + 7] = p[3] - data[atoms_shift + 8] = hcr[n] + + data[atoms_shift + 8] = hcr atoms_shift += 9 # collect connection table From 8e6b9a78fd0d38f0d3e93a5d5bee55a7e2cd3e2d Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 23 Dec 2024 14:53:58 +0100 Subject: [PATCH 48/51] packing reimplemented unpacking WIP --- chython/algorithms/stereo.py | 2 +- chython/containers/_pack.pyx | 69 +++++++----- chython/containers/_unpack.pyx | 194 ++++++++++++++------------------- chython/containers/molecule.py | 33 +----- 4 files changed, 126 insertions(+), 172 deletions(-) diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py index 80f87049..6cd814a2 100644 --- a/chython/algorithms/stereo.py +++ b/chython/algorithms/stereo.py @@ -197,7 +197,7 @@ def cumulenes(self: 'MoleculeContainer') -> List[Tuple[int, ...]]: terminals = [x for x, y in adj.items() if len(y) == 1] # list to keep atoms order! cumulenes = [] while terminals: - n = terminals.pop() + n = terminals.pop(0) m = adj[n].pop() path = [n, m] while m not in terminals: diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack.pyx index fe024654..37b42b34 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack.pyx @@ -20,7 +20,7 @@ cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.math cimport ldexp, frexp -# Format specification:: +# Format V2 specification:: # # Big endian bytes order # 8 bit - 0x02 (current format specification) @@ -48,6 +48,25 @@ from libc.math cimport ldexp, frexp # 7 bit - zero padding. in future can be used for extra bond-level stereo, like atropoisomers. # 1 bit - sign +# stereo block: +# 0000 - no stereo +# 0001 - not in use +# 0010 - allene +# 0011 - allene +# 0100 - not in use +# 0101 - not in use +# 0110 - not in use +# 0111 - not in use +# 1000 - tetrahedron +# 1001 - not in use +# 1010 - not in use +# 1011 - not in use +# 1100 - tetrahedron +# 1101 - not in use +# 1110 - not in use +# 1111 - not in use + + @cython.nonecheck(False) @cython.boundscheck(False) @cython.cdivision(True) @@ -57,18 +76,19 @@ def pack(object molecule): cdef char charge cdef unsigned char atomic_number, ngb_count, isotope, bond, s = 0, buffer_b, buffer_o, stereo, hcr cdef unsigned char *data - cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m + cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m, tn, tm cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift # can be > 2^16 - cdef unsigned char[4096] seen + cdef unsigned char[4096] seen # atom number is 12 bit, thus, can be any value up to 4095. numbers are not continuous cdef bytes py_pack - cdef dict py_ngb, py_atoms, py_bonds + cdef dict py_ngb, py_atoms, py_bonds, py_stereo cdef tuple py_tuple cdef object py_atom, py_bond, py_nan_int, py_obj # map molecule to vars py_atoms = molecule._atoms py_bonds = molecule._bonds + py_stereo = molecule._stereo_cis_trans_terminals # calculate elements count atoms_count = len(py_atoms) @@ -94,6 +114,8 @@ def pack(object molecule): if not data: raise MemoryError() + seen[:] = 0 # erase random data + # start pack collection data[0] = 2 # header. specification version 2 data[1] = atoms_count >> 4 # 5-12b of atom count value @@ -119,12 +141,12 @@ def pack(object molecule): # V2 specification # 2 bit tetrahedron | 2 bit allene | 0000 elif py_nan_int: - if ngb_count == 2: + if ngb_count == 2: # allene stereo = 0x30 else: stereo = 0xc0 else: - if ngb_count == 2: + if ngb_count == 2: # allene stereo = 0x20 else: stereo = 0x80 @@ -149,19 +171,8 @@ def pack(object molecule): data[atoms_shift + 3] = isotope << 7 | atomic_number # 1bI , A # 2 float16 big endian - for n, py_tuple in py_plane.items(): - p = &xy[n] - double_to_float16(py_tuple[0], &p[0]) - double_to_float16(py_tuple[1], &p[2]) - - # erase random data - seen[n] = 0 - stereo[n] = 0 - - data[atoms_shift + 4] = p[0] - data[atoms_shift + 5] = p[1] - data[atoms_shift + 6] = p[2] - data[atoms_shift + 7] = p[3] + double_to_float16(py_atom._x, &data[atoms_shift + 4]) + double_to_float16(py_atom._y, &data[atoms_shift + 6]) data[atoms_shift + 8] = hcr atoms_shift += 9 @@ -181,7 +192,7 @@ def pack(object molecule): b = True if not seen[m]: - bond = py_bond._Bond__order - 1 + bond = py_bond._order - 1 # 3 3 2 | 1 3 3 1 | 2 3 3 if s == 0: buffer_o = bond << 5 @@ -213,17 +224,19 @@ def pack(object molecule): order_shift += 1 s = 0 + py_nan_int = py_bond._stereo + if py_nan_int is not None: + py_tuple = py_stereo[py_obj] + tn, tm = py_tuple + data[cis_trans_shift] = tn >> 4 + data[cis_trans_shift + 1] = tn << 4 | tm >> 8 + data[cis_trans_shift + 2] = tm + data[cis_trans_shift + 3] = py_nan_int + cis_trans_shift += 4 + if s: # flush buffer data[order_shift] = buffer_o - for py_tuple, b in py_cis_trans_stereo.items(): - n, m = py_tuple - data[cis_trans_shift] = n >> 4 - data[cis_trans_shift + 1] = n << 4 | m >> 8 - data[cis_trans_shift + 2] = m - data[cis_trans_shift + 3] = b - cis_trans_shift += 4 - try: py_pack = data[:size] finally: diff --git a/chython/containers/_unpack.pyx b/chython/containers/_unpack.pyx index 670f1f7b..aba7ca34 100644 --- a/chython/containers/_unpack.pyx +++ b/chython/containers/_unpack.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # cython: language_level=3 # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -21,7 +21,15 @@ cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.math cimport ldexp +from chython.containers import MoleculeContainer from chython.containers.bonds import Bond +from chython.periodictable import (H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, + Mn, Fe, Co, Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, + Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, + Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, Tl, Pb, Bi, Po, At, Rn, Fr, + Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, Bh, Hs, + Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og) + # Format specification:: # @@ -57,20 +65,17 @@ from chython.containers.bonds import Bond @cython.wraparound(False) def unpack(const unsigned char[::1] data not None): cdef char *charges - cdef unsigned char a, b, c, d, isotope, atomic_number, neighbors_count, s = 0, nc, version - cdef unsigned char *atoms, *hydrogens, *neighbors, *orders, *is_tet, *is_all - cdef bint *stereo_sign, *ct_sign, *radicals + cdef unsigned char a, b, c, d, isotope, atomic_number, neighbors_count, s = 0, version, stereo, hydrogens + cdef unsigned char *neighbors, *orders + cdef bint *ct_sign cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, order_count cdef unsigned short i, j, k = 0, n, m, buffer_b, shift = 0 - cdef unsigned short *mapping, *isotopes, *cis_trans_1, *cis_trans_2, *connections + cdef unsigned short *mapping, *cis_trans_1, *cis_trans_2, *connections cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift - cdef double *x_coord, *y_coord cdef unsigned char[4096] seen - cdef object bond, py_n, py_m - cdef dict py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, py_ngb - cdef dict py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo - cdef list py_mapping, py_atoms, py_isotopes, py_bonds_flat + cdef object py_mol, py_bond, py_n, py_m, py_atom, py_nan_bool + cdef dict py_atoms, py_bonds, py_ngb # read header version = data[0] @@ -79,24 +84,16 @@ def unpack(const unsigned char[::1] data not None): cis_trans_count = (b & 0x0f) << 8 | c # allocate memory - charges = PyMem_Malloc(atoms_count * sizeof(char)) - radicals = PyMem_Malloc(atoms_count * sizeof(bint)) - atoms = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - hydrogens = PyMem_Malloc(atoms_count * sizeof(unsigned char)) neighbors = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - is_tet = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - is_all = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - stereo_sign = PyMem_Malloc(atoms_count * sizeof(bint)) mapping = PyMem_Malloc(atoms_count * sizeof(unsigned short)) - isotopes = PyMem_Malloc(atoms_count * sizeof(unsigned short)) - x_coord = PyMem_Malloc(atoms_count * sizeof(double)) - y_coord = PyMem_Malloc(atoms_count * sizeof(double)) - if not charges or not radicals or not atoms or not hydrogens or not neighbors or not is_tet or not is_all: - raise MemoryError() - if not stereo_sign or not mapping or not isotopes or not x_coord or not y_coord: + if not neighbors or not mapping: raise MemoryError() + py_mol = MoleculeContainer() + py_atoms = py_mol._atoms + py_bonds = py_mol._bonds + # unpack atom block to separate attributes arrays for i in range(atoms_count): a, b = data[atoms_shift], data[atoms_shift + 1] @@ -106,34 +103,47 @@ def unpack(const unsigned char[::1] data not None): bonds_count += neighbors_count a, b = data[atoms_shift + 2], data[atoms_shift + 3] - if a >> 7: # tetrahedron bit set - is_tet[i] = 1 - is_all[i] = 0 - stereo_sign[i] = a & 0x40 # mask th bit - else: - is_tet[i] = 0 - if a >> 5: # allene bit set - is_all[i] = 1 - stereo_sign[i] = a & 0x10 # mask al bit - else: - is_all[i] = 0 - - atoms[i] = atomic_number = b & 0x7f + stereo = a >> 4 + if stereo == 0: + py_nan_bool = None + elif stereo == 0b0010: + py_nan_bool = False + elif stereo == 0b0011: + py_nan_bool = True + elif stereo == 0b1000: + py_nan_bool = False + else: # if stereo == 0b1100: + py_nan_bool = True + + atomic_number = b & 0x7f + py_atom = object.__new__(elements[atomic_number]) + py_atoms[n] = py_atom + + py_atom._stereo = py_nan_bool + isotope = (a & 0x0f) << 1 | b >> 7 if isotope: - isotopes[i] = common_isotopes[atomic_number] + isotope + py_atom._isotope = common_isotopes[atomic_number] + isotope else: - isotopes[i] = 0 + py_atom._isotope = None a, b = data[atoms_shift + 4], data[atoms_shift + 5] - x_coord[i] = double_from_bytes(a, b) + py_atom._x = double_from_bytes(a, b) a, b = data[atoms_shift + 6], data[atoms_shift + 7] - y_coord[i] = double_from_bytes(a, b) + py_atom._y = double_from_bytes(a, b) a = data[atoms_shift + 8] - hydrogens[i] = a >> 5 - charges[i] = ((a >> 1) & 0x0f) - 4 - radicals[i] = a & 0x01 + hydrogens = a >> 5 + if hydrogens == 7: + py_atom._hydrogens = None + else: + py_atom._hydrogens = hydrogens + + py_atom._charge = ((a >> 1) & 0x0f) - 4 + if a & 0x01: + py_atom._is_radical = True + else: + py_atom._is_radical = False atoms_shift += 9 # calculate bonds count and pack sections @@ -145,7 +155,7 @@ def unpack(const unsigned char[::1] data not None): order_count = order_count / 8 + 1 else: order_count /= 8 - elif version == 0: + else: # if version == 0: order_count = bonds_count / 5 if bonds_count % 5: order_count += 1 @@ -193,7 +203,7 @@ def unpack(const unsigned char[::1] data not None): buffer_b = (a & 0x3) << 1 s = 1 i += 2 - elif version == 0: + else: # if version == 0: for j in range(order_shift, cis_trans_shift, 2): # 0 3 3 1 | 2 3 3 a, b = data[j], data[j + 1] @@ -219,77 +229,31 @@ def unpack(const unsigned char[::1] data not None): ct_sign[i] = d # d = 0x01 or 0x00 cis_trans_shift += 4 - # define returned data - py_mapping = [] - py_atoms = [] - py_isotopes = [] - py_charges = {} - py_radicals = {} - py_hydrogens = {} - py_plane = {} - py_atoms_stereo = {} - py_allenes_stereo = {} - py_cis_trans_stereo = {} - py_bonds = {} - py_bonds_flat = [] + for i in range(atoms_count): + n = mapping[i] + py_n = n # shared py int obj - for i in range(atoms_count): - n = mapping[i] - py_n = n # shared py int obj - - # fill intermediate data - py_mapping.append(py_n) - py_atoms.append(atoms[i]) - py_isotopes.append(isotopes[i] or None) - - py_charges[py_n] = charges[i] - py_radicals[py_n] = radicals[i] - if hydrogens[i] == 7: - py_hydrogens[py_n] = None - else: - py_hydrogens[py_n] = hydrogens[i] - - py_plane[py_n] = (x_coord[i], y_coord[i]) - - if is_tet[i]: - py_atoms_stereo[py_n] = stereo_sign[i] - elif is_all[i]: - py_allenes_stereo[py_n] = stereo_sign[i] - - py_bonds[py_n] = py_ngb = {} - seen[n] = 1 - - nc = neighbors[i] - for j in range(shift, shift + nc): - m = connections[j] - py_m = m - if seen[m]: # bond partially exists. need back-connection. - py_ngb[py_m] = py_bonds[py_m][py_n] - else: - bond = object.__new__(Bond) - bond._Bond__order = orders[k] + 1 - bond._Bond__n = py_n - bond._Bond__m = py_m - py_ngb[py_m] = bond - py_bonds_flat.append(bond) - k += 1 - shift += nc + py_bonds[py_n] = py_ngb = {} + seen[n] = 1 + + neighbors_count = neighbors[i] + for j in range(shift, shift + neighbors_count): + m = connections[j] + py_m = m + if seen[m]: # bond partially exists. need back-connection. + py_ngb[py_m] = py_bonds[py_m][py_n] + else: + bond = object.__new__(Bond) + bond._order = orders[k] + 1 + py_ngb[py_m] = bond + k += 1 + shift += neighbors_count for i in range(cis_trans_count): py_cis_trans_stereo[(cis_trans_1[i], cis_trans_2[i])] = ct_sign[i] - PyMem_Free(charges) - PyMem_Free(radicals) - PyMem_Free(atoms) - PyMem_Free(hydrogens) PyMem_Free(neighbors) - PyMem_Free(is_tet) - PyMem_Free(is_all) - PyMem_Free(stereo_sign) PyMem_Free(mapping) - PyMem_Free(isotopes) - PyMem_Free(x_coord) - PyMem_Free(y_coord) if bonds_count: PyMem_Free(connections) PyMem_Free(orders) @@ -297,9 +261,7 @@ def unpack(const unsigned char[::1] data not None): PyMem_Free(cis_trans_1) PyMem_Free(cis_trans_2) PyMem_Free(ct_sign) - return (py_mapping, py_atoms, py_isotopes, - py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, - py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo, size, py_bonds_flat) + return py_mol, size cdef short[119] common_isotopes @@ -312,6 +274,14 @@ common_isotopes[:] = [0, -15, -12, -9, -7, -5, -4, -2, 0, 3, 4, 7, 8, 11, 12, 15 254, 262, 265, 265, 269, 262, 273, 273, 277, 281, 278] +cdef object[119] elements +elements[:] = [None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, + Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, + Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, + Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, + Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og] + + cdef double double_from_bytes(unsigned char a, unsigned char b): cdef bint sign cdef int e diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 984f845c..16cabc46 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -555,36 +555,12 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, if compressed: data = decompress(data) if data[0] in (0, 2): - (mapping, atom_numbers, isotopes, charges, radicals, hydrogens, plane, bonds, - atoms_stereo, allenes_stereo, cis_trans_stereo, pack_length, bonds_flat) = unpack(data) + mol, pack_length = unpack(data) elif data[0] == 3: - (mapping, atom_numbers, isotopes, charges, radicals, hydrogens, plane, bonds, - atoms_stereo, allenes_stereo, cis_trans_stereo, pack_length, bonds_flat) = cpack(data) + mol, pack_length = cpack(data) else: raise ValueError('invalid pack header') - mol = object.__new__(cls) - mol._bonds = bonds - mol._plane = plane - mol._charges = charges - mol._radicals = radicals - mol._hydrogens = hydrogens - mol._atoms_stereo = atoms_stereo - mol._allenes_stereo = allenes_stereo - mol._cis_trans_stereo = cis_trans_stereo - - mol._MoleculeContainer__meta = None - mol._MoleculeContainer__name = None - mol._atoms = atoms = {} - - for n, a, i in zip(mapping, atom_numbers, isotopes): - atoms[n] = a = object.__new__(Element.from_atomic_number(a)) - a._Core__isotope = i - a._graph = ref(mol) - a._n = n - for b in bonds_flat: - b._Bond__graph = ref(mol) - if _return_pack_length: return mol, pack_length return mol @@ -610,11 +586,6 @@ def _cpack(self, order=None, check=True): atoms = self._atoms bonds = self._bonds - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - atoms_stereo = self._atoms_stereo - allenes_stereo = self._allenes_stereo allenes_terminals = self._stereo_allenes_terminals cumulenes = {} From 4ab39841563c5832fc91b61ce8f772665f49bd26 Mon Sep 17 00:00:00 2001 From: stsouko Date: Mon, 23 Dec 2024 18:11:08 +0100 Subject: [PATCH 49/51] unpacking reimplemented --- chython/containers/_pack.pyx | 3 +- chython/containers/_unpack.pyx | 70 +++++++++++++++------------------- chython/containers/molecule.py | 11 ++++-- 3 files changed, 40 insertions(+), 44 deletions(-) diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack.pyx index 37b42b34..30ccd1bc 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack.pyx @@ -19,6 +19,7 @@ cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.math cimport ldexp, frexp +from libc.string cimport memset # Format V2 specification:: # @@ -114,7 +115,7 @@ def pack(object molecule): if not data: raise MemoryError() - seen[:] = 0 # erase random data + memset(seen, 0, 4096 * sizeof(unsigned char)) # erase random data # start pack collection data[0] = 2 # header. specification version 2 diff --git a/chython/containers/_unpack.pyx b/chython/containers/_unpack.pyx index aba7ca34..80ab6c59 100644 --- a/chython/containers/_unpack.pyx +++ b/chython/containers/_unpack.pyx @@ -64,18 +64,17 @@ from chython.periodictable import (H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, @cython.cdivision(True) @cython.wraparound(False) def unpack(const unsigned char[::1] data not None): - cdef char *charges cdef unsigned char a, b, c, d, isotope, atomic_number, neighbors_count, s = 0, version, stereo, hydrogens cdef unsigned char *neighbors, *orders - cdef bint *ct_sign cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, order_count cdef unsigned short i, j, k = 0, n, m, buffer_b, shift = 0 - cdef unsigned short *mapping, *cis_trans_1, *cis_trans_2, *connections + cdef unsigned short *mapping, *connections cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift cdef unsigned char[4096] seen cdef object py_mol, py_bond, py_n, py_m, py_atom, py_nan_bool cdef dict py_atoms, py_bonds, py_ngb + cdef list py_cis_trans # read header version = data[0] @@ -93,6 +92,7 @@ def unpack(const unsigned char[::1] data not None): py_mol = MoleculeContainer() py_atoms = py_mol._atoms py_bonds = py_mol._bonds + py_cis_trans = [] # unpack atom block to separate attributes arrays for i in range(atoms_count): @@ -135,9 +135,9 @@ def unpack(const unsigned char[::1] data not None): a = data[atoms_shift + 8] hydrogens = a >> 5 if hydrogens == 7: - py_atom._hydrogens = None + py_atom._implicit_hydrogens = None else: - py_atom._hydrogens = hydrogens + py_atom._implicit_hydrogens = hydrogens py_atom._charge = ((a >> 1) & 0x0f) - 4 if a & 0x01: @@ -214,21 +214,6 @@ def unpack(const unsigned char[::1] data not None): orders[i + 4] = b & 0x7 i += 5 - if cis_trans_count: - cis_trans_1 = PyMem_Malloc(cis_trans_count * sizeof(unsigned short)) - cis_trans_2 = PyMem_Malloc(cis_trans_count * sizeof(unsigned short)) - ct_sign = PyMem_Malloc(cis_trans_count * sizeof(bint)) - if not cis_trans_1 or not cis_trans_2 or not ct_sign: - raise MemoryError() - - for i in range(cis_trans_count): - a, b = data[cis_trans_shift], data[cis_trans_shift + 1] - c, d = data[cis_trans_shift + 2], data[cis_trans_shift + 3] - cis_trans_1[i] = a << 4 | b >> 4 - cis_trans_2[i] = (b & 0x0f) << 8 | c - ct_sign[i] = d # d = 0x01 or 0x00 - cis_trans_shift += 4 - for i in range(atoms_count): n = mapping[i] py_n = n # shared py int obj @@ -243,25 +228,31 @@ def unpack(const unsigned char[::1] data not None): if seen[m]: # bond partially exists. need back-connection. py_ngb[py_m] = py_bonds[py_m][py_n] else: - bond = object.__new__(Bond) - bond._order = orders[k] + 1 - py_ngb[py_m] = bond + py_bond = object.__new__(Bond) + py_bond._order = orders[k] + 1 + py_bond._stereo = None + py_ngb[py_m] = py_bond k += 1 shift += neighbors_count - for i in range(cis_trans_count): - py_cis_trans_stereo[(cis_trans_1[i], cis_trans_2[i])] = ct_sign[i] + PyMem_Free(orders) + PyMem_Free(connections) + + if cis_trans_count: + for i in range(cis_trans_count): + a, b = data[cis_trans_shift], data[cis_trans_shift + 1] + c, d = data[cis_trans_shift + 2], data[cis_trans_shift + 3] + py_n = a << 4 | b >> 4 + py_m = (b & 0x0f) << 8 | c + if d: + py_cis_trans.append((py_n, py_m, True)) + else: + py_cis_trans.append((py_n, py_m, False)) + cis_trans_shift += 4 PyMem_Free(neighbors) PyMem_Free(mapping) - if bonds_count: - PyMem_Free(connections) - PyMem_Free(orders) - if cis_trans_count: - PyMem_Free(cis_trans_1) - PyMem_Free(cis_trans_2) - PyMem_Free(ct_sign) - return py_mol, size + return py_mol, py_cis_trans, size cdef short[119] common_isotopes @@ -273,13 +264,12 @@ common_isotopes[:] = [0, -15, -12, -9, -7, -5, -4, -2, 0, 3, 4, 7, 8, 11, 12, 15 222, 221, 228, 227, 231, 231, 235, 236, 241, 242, 243, 244, 245, 254, 253, 254, 254, 262, 265, 265, 269, 262, 273, 273, 277, 281, 278] - -cdef object[119] elements -elements[:] = [None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, - Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, - Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, - Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, - Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og] +cdef list elements +elements = [None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, + Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, + Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, + Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, + Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og] cdef double double_from_bytes(unsigned char a, unsigned char b): diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 16cabc46..1f607829 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -542,7 +542,7 @@ def pack_len(cls, data: bytes, /, *, compressed=True) -> int: return int.from_bytes(data[1:3], 'big') >> 4 @classmethod - def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, + def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, skip_labels_calculation=False, _return_pack_length=False) -> 'MoleculeContainer': """ Unpack from compressed bytes. @@ -555,12 +555,17 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, if compressed: data = decompress(data) if data[0] in (0, 2): - mol, pack_length = unpack(data) + mol, cis_trans, pack_length = unpack(data) + for n, m, s in cis_trans: + mol.bond(*mol._stereo_cis_trans_centers[n])._stereo = s elif data[0] == 3: - mol, pack_length = cpack(data) + mol, cis_trans, pack_length = cpack(data) else: raise ValueError('invalid pack header') + if not skip_labels_calculation: + mol.calc_labels() + if _return_pack_length: return mol, pack_length return mol From 24ce4ece6523d381e10f3eb22e84589163050431 Mon Sep 17 00:00:00 2001 From: stsouko Date: Tue, 24 Dec 2024 13:41:41 +0100 Subject: [PATCH 50/51] modules structure refactored --- build.py | 14 +++++++------- chython/containers/{_pack.pyx => _pack_v2.pyx} | 18 ------------------ .../{_unpack.pyx => _unpack_v0v2.pyx} | 0 .../containers/{_cpack.pyx => _unpack_v3.pyx} | 0 chython/containers/molecule.py | 16 ++++++++-------- 5 files changed, 15 insertions(+), 33 deletions(-) rename chython/containers/{_pack.pyx => _pack_v2.pyx} (96%) rename chython/containers/{_unpack.pyx => _unpack_v0v2.pyx} (100%) rename chython/containers/{_cpack.pyx => _unpack_v3.pyx} (100%) diff --git a/build.py b/build.py index f43339df..7f484611 100644 --- a/build.py +++ b/build.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Ramil Nugmanov +# Copyright 2023, 2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -48,14 +48,14 @@ Extension('chython.algorithms._isomorphism', ['chython/algorithms/_isomorphism.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._pack', - ['chython/containers/_pack.pyx'], + Extension('chython.containers._pack_v2', + ['chython/containers/_pack_v2.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._unpack', - ['chython/containers/_unpack.pyx'], + Extension('chython.containers._unpack_v0v2', + ['chython/containers/_unpack_v0v2.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._cpack', - ['chython/containers/_cpack.pyx'], + Extension('chython.containers._unpack_v3', + ['chython/containers/_unpack_v3.pyx'], extra_compile_args=extra_compile_args), Extension('chython.files._xyz', ['chython/files/_xyz.pyx'], diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack_v2.pyx similarity index 96% rename from chython/containers/_pack.pyx rename to chython/containers/_pack_v2.pyx index 30ccd1bc..6e2a8b19 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack_v2.pyx @@ -49,24 +49,6 @@ from libc.string cimport memset # 7 bit - zero padding. in future can be used for extra bond-level stereo, like atropoisomers. # 1 bit - sign -# stereo block: -# 0000 - no stereo -# 0001 - not in use -# 0010 - allene -# 0011 - allene -# 0100 - not in use -# 0101 - not in use -# 0110 - not in use -# 0111 - not in use -# 1000 - tetrahedron -# 1001 - not in use -# 1010 - not in use -# 1011 - not in use -# 1100 - tetrahedron -# 1101 - not in use -# 1110 - not in use -# 1111 - not in use - @cython.nonecheck(False) @cython.boundscheck(False) diff --git a/chython/containers/_unpack.pyx b/chython/containers/_unpack_v0v2.pyx similarity index 100% rename from chython/containers/_unpack.pyx rename to chython/containers/_unpack_v0v2.pyx diff --git a/chython/containers/_cpack.pyx b/chython/containers/_unpack_v3.pyx similarity index 100% rename from chython/containers/_cpack.pyx rename to chython/containers/_unpack_v3.pyx diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 1f607829..ba8234a6 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -506,7 +506,7 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non :param version: format version :param order: atom order in V3 """ - from ._pack import pack + from ._pack_v2 import pack as pack_v2 if check: bonds = self._bonds @@ -518,9 +518,9 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non raise ValueError('To many neighbors not supported') if version == 2: - data = pack(self) + data = pack_v2(self) elif version == 3: - data = self._cpack(order, check) + data = self._pack_v3(order, check) else: raise ValueError('invalid specification version') if compressed: @@ -549,17 +549,17 @@ def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, skip_labe :param compressed: decompress data before processing. """ - from ._unpack import unpack - from ._cpack import unpack as cpack + from ._unpack_v0v2 import unpack as unpack_v0v2 + from ._unpack_v3 import unpack as unpack_v3 if compressed: data = decompress(data) if data[0] in (0, 2): - mol, cis_trans, pack_length = unpack(data) + mol, cis_trans, pack_length = unpack_v0v2(data) for n, m, s in cis_trans: mol.bond(*mol._stereo_cis_trans_centers[n])._stereo = s elif data[0] == 3: - mol, cis_trans, pack_length = cpack(data) + mol, cis_trans, pack_length = unpack_v3(data) else: raise ValueError('invalid pack header') @@ -580,7 +580,7 @@ def unpach(cls, data: Union[bytes, memoryview], /, *, compressed=True) -> 'Molec def __bytes__(self): return self.pack() - def _cpack(self, order=None, check=True): + def _pack_v3(self, order=None, check=True): if order is None: order = list(self._atoms) elif check: From eef995fb29663a91f2d025d524811a4e17d055ce Mon Sep 17 00:00:00 2001 From: stsouko Date: Tue, 24 Dec 2024 13:44:25 +0100 Subject: [PATCH 51/51] WIP prototyping phase --- chython/containers/_pack_v4.pyx | 305 ++++++++++++++++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 chython/containers/_pack_v4.pyx diff --git a/chython/containers/_pack_v4.pyx b/chython/containers/_pack_v4.pyx new file mode 100644 index 00000000..559bc3db --- /dev/null +++ b/chython/containers/_pack_v4.pyx @@ -0,0 +1,305 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2024 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +cimport cython +from cpython.mem cimport PyMem_Malloc, PyMem_Free +from libc.math cimport ldexp, frexp +from libc.string cimport memset + +# Format V4 specification:: +# Based on V2 with changes in stereochemistry encoding. Compatible with V2 if no stereo information encoded. +# Half of the 3rd atom block's byte changed to support extended stereo. +# +# Big endian bytes order +# 8 bit - 0x04 (current format specification) +# 12 bit - number of atoms +# 12 bit - extended bond stereo block size +# +# Atom block 9 bytes (repeated): +# 12 bit - atom number +# 4 bit - number of neighbors +# 4 bit stereo block. details below. +# 5 bit - isotope (00000 - not specified, over = isotope - common_isotope + 16) +# 7 bit - atomic number (<=118) +# 32 bit - XY float16 coordinates +# 3 bit - hydrogens (0-7). Note: 7 == None +# 4 bit - charge (charge + 4. possible range -4 - 4) +# 1 bit - radical state +# +# Connection table: flatten list of neighbors. neighbors count stored in atom block. +# For example CC(=O)O - {1: [2], 2: [1, 3, 4], 3: [2], 4: [2]} >> [2, 1, 3, 4, 2, 2]. +# Repeated block (equal to bonds count). +# 24 bit - paired 12 bit numbers. +# +# Bonds order block 3 bit per bond zero-padded to full byte at the end. +# 3 3 2 | 1 3 3 1 | 2 3 3 +# 000 - single +# 001 - double +# 010 - triple +# 011 - aromatic +# 111 - special bond (e.g. coordinate) +# +# Atom stereo block: +# ANDx and ORx encode only sign. X value stored in the same order in Stereo group block. +# 0000 [same as V2] - no stereo or unknown +# 0001 - not used +# 0010 - absolute sign False +# 0011 - absolute sign True +# 0100 - sign False OR1 group +# 0101 - sign True OR1 group +# 0110 - sign False AND1 group +# 0111 - sign True AND1 group +# 1000 - sign False OR2 group +# 1001 - sign True OR2 group +# 1010 - sign False AND2 group +# 1011 - sign True AND2 group +# 1100 - sign False ORx group +# 1101 - sign True ORx group +# 1110 - sign False ANDx group +# 1111 - sign True ANDx group +# +# Stereo group block: +# 16 bit group number. hypothetically groups count could be equal to sum of bonds and atoms counts +# first stored groups for atoms in the same order as defined in atom stereo. +# later stored bonds stereo groups. +# allowed values = [3, 2^16). +# +# Cis/trans data block (repeated): +# 24 bit - atoms pair +# 7 bit - zero padding. in future can be used for extra bond-level stereo, like atropoisomers. +# 1 bit - sign + + +@cython.nonecheck(False) +@cython.boundscheck(False) +@cython.cdivision(True) +@cython.wraparound(False) +def pack(object molecule): + cdef bint b = True # binary flag + cdef char charge + cdef unsigned char atomic_number, ngb_count, isotope, bond, s = 0, buffer_b, buffer_o, stereo, hcr + cdef unsigned char *data + cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m, tn, tm + cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift # can be > 2^16 + cdef unsigned char[4096] seen # atom number is 12 bit, thus, can be any value up to 4095. numbers are not continuous + + cdef bytes py_pack + cdef dict py_ngb, py_atoms, py_bonds + cdef tuple py_tuple + cdef object py_atom, py_bond, py_nan_int, py_obj + + # map molecule to vars + py_atoms = molecule._atoms + py_bonds = molecule._bonds + + # calculate elements count + atoms_count = len(py_atoms) + cis_trans_count = molecule._cis_trans_count + + for py_ngb in py_bonds.values(): + bonds_count += len(py_ngb) + bonds_count /= 2 # graph is bidirected + + # calculate pack blocks entries + size = bonds_count * 3 # bonds bits + if size % 8: # partial byte fill + size = size / 8 + 1 + else: + size /= 8 + bonds_shift = 4 + 9 * atoms_count # connection table starting byte + order_shift = bonds_shift + 3 * bonds_count # bond orders block starting byte + cis_trans_shift = size + order_shift # cis-trans block starting byte + size = cis_trans_shift + 4 * cis_trans_count # total pack size + + # allocate pack in memory + data = PyMem_Malloc(size * sizeof(unsigned char)) + if not data: + raise MemoryError() + + memset(seen, 0, 4096 * sizeof(unsigned char)) # erase random data + + # start pack collection + data[0] = 2 # header. specification version 2 + data[1] = atoms_count >> 4 # 5-12b of atom count value + data[2] = atoms_count << 4 | cis_trans_count >> 8 # 1-4b of atom count value, 9-12b of cis-trans count value + data[3] = cis_trans_count # 1-8b of cis-trans count value + + for py_obj, py_atom in py_atoms.items(): + py_ngb = py_bonds[py_obj] + ngb_count = len(py_ngb) + n = py_obj # cast to C + seen[n] = 1 + atomic_number = py_atom.atomic_number + + py_nan_int = py_atom._isotope # direct access + if py_nan_int is None: + isotope = 0 + else: + isotope = py_nan_int - common_isotopes[atomic_number] + + py_nan_int = py_atom._stereo + if py_nan_int is None: + stereo = 0 + # V2 specification + # 2 bit tetrahedron | 2 bit allene | 0000 + elif py_nan_int: + if ngb_count == 2: # allene + stereo = 0x30 + else: + stereo = 0xc0 + else: + if ngb_count == 2: # allene + stereo = 0x20 + else: + stereo = 0x80 + + # precalculate atom attrs + # should be done independently, due to possible randomness in dicts order. + # 3 bit - hydrogens (0-7) | 4 bit - charge | 1 bit - radical + py_nan_int = py_atom._implicit_hydrogens + if py_nan_int is None: + hcr = 0xe0 # 0b11100000 + else: + hcr = py_nan_int << 5 + + charge = py_atom._charge + hcr |= (charge + 4) << 1 + if py_atom._is_radical: + hcr |= 1 + + data[atoms_shift] = n >> 4 # 5-12b AN + data[atoms_shift + 1] = n << 4 | ngb_count # 1-4b AN, 4b NC + data[atoms_shift + 2] = stereo | isotope >> 1 # TS , AS , 4b I + data[atoms_shift + 3] = isotope << 7 | atomic_number # 1bI , A + + # 2 float16 big endian + double_to_float16(py_atom._x, &data[atoms_shift + 4]) + double_to_float16(py_atom._y, &data[atoms_shift + 6]) + + data[atoms_shift + 8] = hcr + atoms_shift += 9 + + # collect connection table + for m, py_bond in py_ngb.items(): + if b: # 8 + 4 + data[bonds_shift] = m >> 4 + bonds_shift += 1 + buffer_b = m << 4 + b = False # switch + else: # 4 + 8 + data[bonds_shift] = buffer_b | m >> 8 + bonds_shift += 1 + data[bonds_shift] = m + bonds_shift += 1 # next free 3 bytes block + b = True + + if not seen[m]: + bond = py_bond._order - 1 + # 3 3 2 | 1 3 3 1 | 2 3 3 + if s == 0: + buffer_o = bond << 5 + s = 1 + elif s == 1: + buffer_o |= bond << 2 + s = 2 + elif s == 2: + data[order_shift] = buffer_o | bond >> 1 + order_shift += 1 + buffer_o = bond << 7 + s = 3 + elif s == 3: + buffer_o |= bond << 4 + s = 4 + elif s == 4: + buffer_o |= bond << 1 + s = 5 + elif s == 5: + data[order_shift] = buffer_o | bond >> 2 + order_shift += 1 + buffer_o = bond << 6 + s = 6 + elif s == 6: + buffer_o |= bond << 3 + s = 7 + else: # 7 + data[order_shift] = buffer_o | bond + order_shift += 1 + s = 0 + + py_nan_int = py_bond._stereo + if py_nan_int is not None: + py_tuple = py_stereo[py_obj] + tn, tm = py_tuple + data[cis_trans_shift] = tn >> 4 + data[cis_trans_shift + 1] = tn << 4 | tm >> 8 + data[cis_trans_shift + 2] = tm + data[cis_trans_shift + 3] = py_nan_int + cis_trans_shift += 4 + + if s: # flush buffer + data[order_shift] = buffer_o + + try: + py_pack = data[:size] + finally: + PyMem_Free(data) + return py_pack + + +cdef short[119] common_isotopes +common_isotopes[:] = [0, -15, -12, -9, -7, -5, -4, -2, 0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 24, 23, 24, 29, + 32, 35, 36, 39, 40, 43, 43, 48, 49, 54, 57, 59, 63, 64, 68, 69, 72, 73, 75, 77, + 80, 82, 85, 87, 90, 92, 96, 99, 103, 106, 112, 111, 115, 117, 121, 123, 124, 125, + 128, 129, 134, 136, 141, 143, 147, 149, 151, 153, 157, 159, 162, 165, 168, 170, + 174, 176, 179, 181, 185, 188, 191, 193, 193, 194, 206, 207, 210, 211, 216, 215, + 222, 221, 228, 227, 231, 231, 235, 236, 241, 242, 243, 244, 245, 254, 253, 254, + 254, 262, 265, 265, 269, 262, 273, 273, 277, 281, 278] + + +cdef void double_to_float16(double x, unsigned char* p): + # adopted from cpython source code + cdef unsigned char sign + cdef int e + cdef double f + cdef unsigned short bits + + if x == 0.: + p[0] = p[1] = 0 + return + + sign = x < 0. + if sign: + x = -x + f = frexp(x, &e) + e -= 1 + if f < .5 or f >= 1. or e >= 16 or e < -25: + p[0] = p[1] = 0 + return # ignore big values + + f *= 2.0 + if e < -14: + f = ldexp(f, 14 + e) + e = 0 + else: + e += 15 + f -= 1. + + f *= 1024. + bits = f | (e << 10) | (sign << 15) + p[0] = bits >> 8 + p[1] = bits