diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml deleted file mode 100644 index 14ec8ebf..00000000 --- a/.github/workflows/tests.yml +++ /dev/null @@ -1,161 +0,0 @@ -name: Tests - -on: - push: - branches: [ main, master, tests_update ] - pull_request: - branches: [ main, master, tests_update ] - -jobs: - test: - strategy: - fail-fast: false - matrix: - include: - # Linux builds (required) - - os: ubuntu-latest - python-version: "3.8" - extra_deps: "importlib_resources" - experimental: false - - os: ubuntu-latest - python-version: "3.9" - extra_deps: "" - experimental: false - - os: ubuntu-latest - python-version: "3.10" - extra_deps: "" - experimental: false - - os: ubuntu-latest - python-version: "3.11" - extra_deps: "" - experimental: false - # Windows builds (allowed to fail) - - os: windows-latest - python-version: "3.8" - extra_deps: "importlib_resources" - experimental: true - - os: windows-latest - python-version: "3.9" - extra_deps: "" - experimental: true - - os: windows-latest - python-version: "3.10" - extra_deps: "" - experimental: true - - os: windows-latest - python-version: "3.11" - extra_deps: "" - experimental: true - - runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.experimental }} - - steps: - - uses: actions/checkout@v3 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - cache: 'pip' # Cache pip dependencies - - - name: Install system dependencies (Linux) - if: runner.os == 'Linux' - run: | - sudo apt-get update - sudo apt-get install -y build-essential gcc g++ gfortran - gcc --version # Print version for debugging - - - name: Install system dependencies (Windows) - if: runner.os == 'Windows' - run: | - choco install visualstudio2019buildtools --package-parameters "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64" - - - name: Install Cython and core dependencies - run: | - python -m pip install --upgrade pip - pip install "cython>=3.0.5" setuptools wheel - if [ "${{ matrix.extra_deps }}" != "" ]; then - pip install ${{ matrix.extra_deps }} - fi - shell: bash - - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - version: 1.7.1 - virtualenvs-create: true - virtualenvs-in-project: true - - - name: Configure Poetry (Windows) - if: runner.os == 'Windows' - run: | - $env:Path += ";$env:APPDATA\Python\Scripts" - poetry config installer.max-workers 4 - shell: pwsh - - - name: Configure Poetry (Linux) - if: runner.os == 'Linux' - run: | - poetry config installer.max-workers 4 - shell: bash - - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v3 - with: - path: .venv - key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}-${{ hashFiles('**/*.pyx') }} - restore-keys: | - venv-${{ runner.os }}-${{ matrix.python-version }}- - - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --with dev - env: - PATH: ${{ env.PATH }};${{ env.APPDATA }}\Python\Scripts - timeout-minutes: 15 - - - name: Clean Cython build - shell: bash - run: | - rm -rf build/ - rm -rf *.so - find . -name "*.c" -delete || true - - - name: Force Cython recompilation - run: poetry run pip install -e . --no-deps --force-reinstall - - - name: Install test dependencies - run: | - poetry run pip install pytest-html pytest-cov - shell: bash - - - name: Run tests - run: | - mkdir -p test-results - poetry run pytest tests/ -v --durations=10 --cache-clear \ - --junitxml=test-results/junit.xml \ - --html=test-results/report.html \ - --cov=chython \ - --cov-report=html:test-results/coverage \ - --cov-report=xml:test-results/coverage.xml - - - name: Upload test results - if: always() - uses: actions/upload-artifact@v3 - with: - name: test-results-${{ matrix.os }}-py${{ matrix.python-version }} - path: | - test-results/ - .pytest_cache/ - retention-days: 7 - - - name: Upload coverage to Codecov - if: runner.os == 'Linux' && matrix.python-version == '3.11' - uses: codecov/codecov-action@v3 - with: - file: test-results/coverage.xml - flags: unittests - name: codecov-umbrella - fail_ci_if_error: false \ No newline at end of file diff --git a/README.rst b/README.rst index bbe8d4e8..0040e8b7 100644 --- a/README.rst +++ b/README.rst @@ -31,8 +31,6 @@ Install Only python 3.8+. -Note: for using `clean2d` install NodeJS into system. - * **stable version available through PyPI**:: pip install chython diff --git a/build.py b/build.py index f43339df..6f97641d 100644 --- a/build.py +++ b/build.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Ramil Nugmanov +# Copyright 2023-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -48,14 +48,11 @@ Extension('chython.algorithms._isomorphism', ['chython/algorithms/_isomorphism.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._pack', - ['chython/containers/_pack.pyx'], + Extension('chython.containers._pack_v2', + ['chython/containers/_pack_v2.pyx'], extra_compile_args=extra_compile_args), - Extension('chython.containers._unpack', - ['chython/containers/_unpack.pyx'], - extra_compile_args=extra_compile_args), - Extension('chython.containers._cpack', - ['chython/containers/_cpack.pyx'], + Extension('chython.containers._unpack_v0v2', + ['chython/containers/_unpack_v0v2.pyx'], extra_compile_args=extra_compile_args), Extension('chython.files._xyz', ['chython/files/_xyz.pyx'], diff --git a/chython/__init__.py b/chython/__init__.py index 0c860191..b695b7b2 100644 --- a/chython/__init__.py +++ b/chython/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # Copyright 2014-2019 Timur Madzhidov tmadzhidov@gmail.com features and API discussion # Copyright 2014-2019 Alexandre Varnek base idea of CGR approach # This file is part of chython. @@ -25,7 +25,7 @@ from .utils import * -pickle_cache = False # store cached attributes in pickle torch_device = 'cpu' # AAM model device. Change before first `reset_mapping` call! + __all__ = [] diff --git a/chython/algorithms/_isomorphism.pyx b/chython/algorithms/_isomorphism.pyx index f701f4e5..3d39f200 100644 --- a/chython/algorithms/_isomorphism.pyx +++ b/chython/algorithms/_isomorphism.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # Copyright 2021 Aleksandr Sizov # This file is part of chython. # @@ -24,52 +24,85 @@ from libc.string cimport memset cdef extern from "Python.h": dict _PyDict_NewPresized(Py_ssize_t minused) +cdef packed struct atom_t: + unsigned long long bits1 + unsigned long long bits2 + unsigned long long bits3 + unsigned long long bits4 + unsigned int from_ + unsigned int to_ + unsigned int mapping + +cdef packed struct bond_t: + unsigned long long bond + unsigned int index + +cdef packed struct molecule_t: + unsigned int atoms_count + atom_t *atoms + bond_t *bonds + +cdef packed struct q_atom_t: + unsigned long long mask1 + unsigned long long mask2 + unsigned long long mask3 + unsigned long long mask4 + unsigned int back + unsigned int closure + unsigned int from_ + unsigned int to_ + unsigned int mapping + +cdef packed struct query_t: + unsigned int atoms_count + q_atom_t *atoms + bond_t *bonds + @cython.boundscheck(False) @cython.wraparound(False) -def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back not None, - unsigned long long[::1] q_masks1 not None, unsigned long long[::1] q_masks2 not None, - unsigned long long[::1] q_masks3 not None, unsigned long long[::1] q_masks4 not None, - unsigned int[::1] q_closures not None, unsigned int[::1] q_from not None, - unsigned int[::1] q_to not None, unsigned int[::1] q_indices not None, - unsigned long long[::1] q_bonds not None, unsigned long[::1] o_numbers not None, - unsigned long long[::1] o_bits1 not None, unsigned long long[::1] o_bits2 not None, - unsigned long long[::1] o_bits3 not None, unsigned long long[::1] o_bits4 not None, - unsigned long long[::1] o_bonds not None, unsigned int[::1] o_from not None, - unsigned int[::1] o_to not None, unsigned int[::1] o_indices not None, - unsigned int[::1] scope not None): +def get_mapping(const unsigned char[::1] q_buffer not None, const unsigned char[::1] m_buffer not None, + const unsigned int[::1] scope not None): # expected less than 2^16 atoms in structure. - cdef unsigned int stack = 0, path_size = 0, q_size, q_size_dec, o_size, depth, front, back, closures_num - cdef unsigned int n, m, o, i, j, closures_counter - cdef unsigned long long q_mask1, q_mask2, q_mask3, q_mask4, o_bond, c_bond + cdef unsigned int stack = 0, path_size = 0, depth, front, q_decrement + cdef unsigned int n, m, i, j, closures_counter + cdef unsigned long long c_bond cdef dict mapping - - q_size = len(q_numbers) - q_size_dec = q_size - 1 - o_size = len(o_numbers) - cdef unsigned int *path = PyMem_Malloc(q_size_dec * sizeof(unsigned int)) - cdef unsigned int *stack_index = PyMem_Malloc(2 * o_size * sizeof(unsigned int)) - cdef unsigned int *stack_depth = PyMem_Malloc(2 * o_size * sizeof(unsigned int)) - cdef bint *matched = PyMem_Malloc(o_size * sizeof(bint)) - cdef unsigned long long *o_closures = PyMem_Malloc(o_size * sizeof(unsigned long long)) - - if not path or not stack_index or not stack_depth or not matched or not o_closures: + cdef query_t query + cdef molecule_t molecule + cdef q_atom_t q_atom + cdef atom_t n_atom, m_atom + cdef bond_t i_bond, j_bond + + query.atoms_count = ( &q_buffer[0])[0] + query.atoms = (&q_buffer[0] + 4) + query.bonds = (&query.atoms[0] + query.atoms_count) + q_decrement = query.atoms_count - 1 + + molecule.atoms_count = ( &m_buffer[0])[0] + molecule.atoms = (&m_buffer[0] + 4) + molecule.bonds = (&molecule.atoms[0] + molecule.atoms_count) + + cdef unsigned int *path = PyMem_Malloc(q_decrement * sizeof(unsigned int)) + cdef unsigned int *stack_index = PyMem_Malloc(2 * molecule.atoms_count * sizeof(unsigned int)) + cdef unsigned int *stack_depth = PyMem_Malloc(2 * molecule.atoms_count * sizeof(unsigned int)) + cdef bint *matched = PyMem_Malloc(molecule.atoms_count * sizeof(bint)) + cdef unsigned long long *closures = PyMem_Malloc(molecule.atoms_count * sizeof(unsigned long long)) + + if not path or not stack_index or not stack_depth or not matched or not closures: raise MemoryError() - memset(matched, 0, o_size * sizeof(bint)) - memset(o_closures, 0, o_size * sizeof(unsigned long long)) + memset(matched, 0, molecule.atoms_count * sizeof(bint)) + memset(closures, 0, molecule.atoms_count * sizeof(unsigned long long)) - # find entry-points. - q_mask1 = q_masks1[0] - q_mask2 = q_masks2[0] - q_mask3 = q_masks3[0] - q_mask4 = q_masks4[0] - for n in range(o_size): + q_atom = query.atoms[0] + for n in range(molecule.atoms_count): + n_atom = molecule.atoms[n] if (scope[n] and - q_mask1 & o_bits1[n] and # o_bits1 doesn't contain bond bits. - q_mask2 & o_bits2[n] == o_bits2[n] and - q_mask3 & o_bits3[n] == o_bits3[n] and - q_mask4 & o_bits4[n]): + q_atom.mask1 & n_atom.bits1 and # bits1 doesn't contain bond bits. + q_atom.mask2 & n_atom.bits2 == n_atom.bits2 and + q_atom.mask3 & n_atom.bits3 == n_atom.bits3 and + q_atom.mask4 & n_atom.bits4): stack_index[stack] = n stack_depth[stack] = 0 @@ -81,11 +114,11 @@ def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back depth = stack_depth[stack] n = stack_index[stack] - if depth == q_size_dec: - mapping = _PyDict_NewPresized(q_size) + if depth == q_decrement: + mapping = _PyDict_NewPresized(query.atoms_count) for i in range(depth): - mapping[q_numbers[i]] = o_numbers[path[i]] - mapping[q_numbers[depth]] = o_numbers[n] + mapping[query.atoms[i].mapping] = molecule.atoms[path[i]].mapping + mapping[query.atoms[depth].mapping] = molecule.atoms[n].mapping yield mapping else: if path_size != depth: # dead end reached @@ -98,40 +131,37 @@ def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back path_size += 1 front = depth + 1 - back = q_back[front] - if back != depth: # branch - n = path[back] - # load next query atom - q_mask1 = q_masks1[front] - q_mask2 = q_masks2[front] - q_mask3 = q_masks3[front] - q_mask4 = q_masks4[front] - closures_num = q_closures[front] - - for i in range(o_from[n], o_to[n]): - o_bond = o_bonds[i] - m = o_indices[i] + q_atom = query.atoms[front] + if q_atom.back != depth: # branch + n = path[q_atom.back] + + n_atom = molecule.atoms[n] + for i in range(n_atom.from_, n_atom.to_): + i_bond = molecule.bonds[i] + m = i_bond.index + m_atom = molecule.atoms[m] if (scope[m] and not matched[m] and - q_mask1 & o_bond == o_bond and # bond order, in ring mark and atom bit should match. - q_mask2 & o_bits2[m] == o_bits2[m] and - q_mask3 & o_bits3[m] == o_bits3[m] and - q_mask4 & o_bits4[m]): + q_atom.mask1 & i_bond.bond == i_bond.bond and # bond order, in ring mark and atom bit should match. + q_atom.mask2 & m_atom.bits2 == m_atom.bits2 and + q_atom.mask3 & m_atom.bits3 == m_atom.bits3 and + q_atom.mask4 & m_atom.bits4): - if closures_num: # candidate atom should have same closures. + if q_atom.closure: # candidate atom should have same closures. closures_counter = 0 # make a map of closures for o_n atom # an index is a neighbor atom and a value is a bond between o_n and the neighbor - for j in range(o_from[m], o_to[m]): - o = o_indices[j] - if o != n and matched[o]: - o_closures[o] = o_bonds[j] + for j in range(m_atom.from_, m_atom.to_): + j_bond = molecule.bonds[j] + if j_bond.index != n and matched[j_bond.index]: + closures[j_bond.index] = j_bond.bond closures_counter += 1 - if closures_counter == closures_num: - for j in range(q_from[front], q_to[front]): - c_bond = o_closures[path[q_indices[j]]] - if not c_bond or q_bonds[j] & c_bond != c_bond: # compare order and ring bits + if closures_counter == q_atom.closure: + for j in range(q_atom.from_, q_atom.to_): + j_bond = query.bonds[j] + c_bond = closures[path[j_bond.index]] + if not c_bond or j_bond.bond & c_bond != c_bond: # compare order and ring bits break else: stack_index[stack] = m @@ -139,12 +169,13 @@ def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back stack += 1 # fill an array with nulls - for j in range(o_from[m], o_to[m]): - o_closures[o_indices[j]] = 0 + for j in range(m_atom.from_, m_atom.to_): + j_bond = molecule.bonds[j] + closures[j_bond.index] = 0 else: # candidate atom should not have closures. - for j in range(o_from[m], o_to[m]): - o = o_indices[j] - if o != n and matched[o]: + for j in range(m_atom.from_, m_atom.to_): + j_bond = molecule.bonds[j] + if j_bond.index != n and matched[j_bond.index]: break # found closure else: stack_index[stack] = m @@ -155,4 +186,4 @@ def get_mapping(unsigned long[::1] q_numbers not None, unsigned int[::1] q_back PyMem_Free(matched) PyMem_Free(stack_index) PyMem_Free(stack_depth) - PyMem_Free(o_closures) + PyMem_Free(closures) diff --git a/chython/algorithms/aromatics/_rules.py b/chython/algorithms/aromatics/_rules.py index 02b061aa..49b69cd6 100644 --- a/chython/algorithms/aromatics/_rules.py +++ b/chython/algorithms/aromatics/_rules.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -104,7 +104,21 @@ def _rules(): return rules +def _freaks(): + from ... import smarts + + rules = [] + + q = smarts('[N,O,S;D2;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') + rules.append(q) + + q = smarts('[N;D3;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') + rules.append(q) + return rules + + rules = Proxy(_rules) +freak_rules = Proxy(_freaks) -__all__ = ['rules'] +__all__ = ['rules', 'freak_rules'] diff --git a/chython/algorithms/aromatics/kekule.py b/chython/algorithms/aromatics/kekule.py index ef9834e9..7f3cbd6c 100644 --- a/chython/algorithms/aromatics/kekule.py +++ b/chython/algorithms/aromatics/kekule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -27,6 +27,18 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +P = 15 +S = 16 +As = 33 +Se = 34 +Te = 52 + + class Kekule: __slots__ = () @@ -46,12 +58,13 @@ def kekule(self: Union['Kekule', 'MoleculeContainer'], *, buffer_size=7) -> bool bonds = self._bonds atoms = set() for n, m, b in kekule: - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b atoms.add(n) atoms.add(m) for n in atoms: - self._calc_implicit(n) - self.flush_cache() + self.calc_implicit(n) + self.flush_cache(keep_sssr=True, keep_components=True) + self.calc_labels() return True return fixed @@ -61,21 +74,23 @@ def enumerate_kekule(self: Union['Kekule', 'MoleculeContainer']): """ self.__fix_rings() # fix bad aromatic rings for form in self.__kekule_full(0): - copy = self.copy() + copy = self.copy(keep_sssr=True, keep_components=True) bonds = copy._bonds atoms = set() for n, m, b in form: - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b atoms.add(n) atoms.add(m) for n in atoms: - copy._calc_implicit(n) + copy.calc_implicit(n) + copy.calc_labels() yield copy def __fix_rings(self: 'MoleculeContainer'): + atoms = self._atoms bonds = self._bonds - charges = self._charges seen = set() + keep = True for q, af, bf, mm in rules: for mapping in q.get_mapping(self, automorphism_filter=False): match = set(mapping.values()) @@ -85,23 +100,23 @@ def __fix_rings(self: 'MoleculeContainer'): for n, c in af.items(): n = mapping[n] - charges[n] = c + atoms[n]._charge = c for n, m, b in bf: n = mapping[n] m = mapping[m] - bonds[n][m]._Bond__order = b # noqa + bonds[n][m]._order = b + if b == 8: + # flush sssr and components cache + keep = False if seen: - self.flush_cache() + self.flush_cache(keep_sssr=keep, keep_components=keep) + self.calc_labels() return True return False def __prepare_rings(self: 'MoleculeContainer'): atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - hydrogens = self._hydrogens - neighbors = self.neighbors rings = defaultdict(list) # aromatic skeleton pyrroles = set() @@ -110,12 +125,11 @@ def __prepare_rings(self: 'MoleculeContainer'): triple_bonded = set() for n, m_bond in bonds.items(): for m, bond in m_bond.items(): - bo = bond.order - if bo == 4: + if bond == 4: rings[n].append(m) - elif bo == 2: + elif bond == 2: double_bonded[n].append(m) - elif bo == 3: + elif bond == 3: triple_bonded.add(n) if not rings: @@ -157,7 +171,7 @@ def __prepare_rings(self: 'MoleculeContainer'): if m not in seen: rings[n].remove(m) rings[m].remove(n) - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 if any(len(ms) not in (2, 3) for ms in rings.values()): raise InvalidAromaticRing('not in ring aromatic bond or hypercondensed rings: ' @@ -168,133 +182,128 @@ def __prepare_rings(self: 'MoleculeContainer'): if any(len(rings[n]) != 2 for n in double_bonded): # double bonded never condensed raise InvalidAromaticRing('quinone valence error') for n in double_bonded: - if atoms[n].atomic_number == 7: - if charges[n] != 1: + if (atom := atoms[n]) == N: + if atom.charge != 1: raise InvalidAromaticRing('quinone should be charged N atom') - elif atoms[n].atomic_number not in (6, 15, 16, 33, 34, 52) or charges[n]: + elif atom not in (C, P, S, As, Se, Te) or atom.charge: raise InvalidAromaticRing('quinone should be neutral S, Se, Te, C, P, As atom') for n in rings: - an = atoms[n].atomic_number - ac = charges[n] - ab = neighbors(n) - if an == 6: # carbon - if ac == 0: - if ab not in (2, 3): + if (atom := atoms[n]) == C: # carbon + if atom.charge == 0: + if atom.neighbors not in (2, 3): raise InvalidAromaticRing - elif ac in (-1, 1): - if radicals[n]: - if ab == 2: + elif atom.charge in (-1, 1): + if atom.is_radical: + if atom.neighbors == 2: double_bonded.add(n) else: raise InvalidAromaticRing - elif ab == 3: + elif atom.neighbors == 3: double_bonded.add(n) - elif ab == 2: # benzene (an|cat)ion or pyrrole + elif atom.neighbors == 2: # benzene (an|cat)ion or pyrrole pyrroles.add(n) else: raise InvalidAromaticRing else: raise InvalidAromaticRing - elif an in (7, 15, 33): - if ac == 0: # pyrrole or pyridine. include radical pyrrole - if radicals[n]: - if ab != 2: # only pyrrole radical + elif atom in (N, P, As): + if atom.charge == 0: # pyrrole or pyridine. include radical pyrrole + if atom.is_radical: + if atom.neighbors != 2: # only pyrrole radical raise InvalidAromaticRing double_bonded.add(n) - elif ab == 3: - if an == 7: # pyrrole only possible + elif atom.neighbors == 3: + if atom == N: # pyrrole only possible double_bonded.add(n) else: # P(III) or P(V)H pyrroles.add(n) - elif ab == 2: - ah = hydrogens[n] - if ah is None: # pyrrole or pyridine + elif atom.neighbors == 2: + if atom.implicit_hydrogens is None: # pyrrole or pyridine pyrroles.add(n) - elif ah == 1: # only pyrrole + elif atom.implicit_hydrogens == 1: # only pyrrole double_bonded.add(n) - elif ah: # too many hydrogens for aromatic rings + elif atom.implicit_hydrogens: # too many hydrogens for aromatic rings raise InvalidAromaticRing - elif ab != 4 or an not in (15, 33): # P(V) in ring [P;a](-R1)-R2 + elif atom.neighbors != 4 or atom not in (P, As): # P(V) in ring [P;a](-R1)-R2 raise InvalidAromaticRing - elif ac == -1: # pyrrole only - if ab != 2 or radicals[n]: + elif atom.charge == -1: # pyrrole only + if atom.neighbors != 2 or atom.is_radical: raise InvalidAromaticRing double_bonded.add(n) - elif ac != 1: + elif atom.charge != 1: raise InvalidAromaticRing - elif radicals[n]: - if ab != 2: # not cation-radical pyridine + elif atom.is_radical: + if atom.neighbors != 2: # not cation-radical pyridine raise InvalidAromaticRing - elif ab == 2: # pyrrole cation or protonated pyridine + elif atom.neighbors == 2: # pyrrole cation or protonated pyridine pyrroles.add(n) - elif ab != 3: # not pyridine oxyde + elif atom.neighbors != 3: # not pyridine oxyde raise InvalidAromaticRing - elif an == 8: # furan - if ab == 2: - if ac == 0: - if radicals[n]: + elif atom == O: # furan + if atom.neighbors == 2: + if atom.charge == 0: + if atom.is_radical: raise InvalidAromaticRing('radical oxygen') double_bonded.add(n) - elif ac == 1: - if radicals[n]: # furan cation-radical + elif atom.charge == 1: + if atom.is_radical: # furan cation-radical double_bonded.add(n) # pyrylium else: raise InvalidAromaticRing('invalid oxygen charge') else: raise InvalidAromaticRing('Triple-bonded oxygen') - elif an in (16, 34, 52): # thiophene + elif atom in (S, Se, Te): # thiophene if n not in double_bonded: # not sulphoxyde nor sulphone - if ab == 2: - if radicals[n]: - if ac == 1: + if atom.neighbors == 2: + if atom.is_radical: + if atom.charge == 1: double_bonded.add(n) else: raise InvalidAromaticRing('S, Se, Te cation-radical expected') - if ac == 0: + if atom.charge == 0: double_bonded.add(n) - elif ac != 1: + elif atom.charge != 1: raise InvalidAromaticRing('S, Se, Te cation in benzene like ring expected') - elif ab == 3: - if radicals[n]: - if ac: + elif atom.neighbors == 3: + if atom.is_radical: + if atom.charge: raise InvalidAromaticRing('S, Se, Te ion-radical ring') double_bonded.add(n) - elif ac == 1: + elif atom.charge == 1: double_bonded.add(n) - elif ac: + elif atom.charge: raise InvalidAromaticRing('S, Se, Te invalid charge ring') else: raise InvalidAromaticRing('S, Se, Te hypervalent ring') - elif an == 5: # boron - if ac == 0: - if ab == 2: - if radicals[n]: # C=1O[B]OC=1 + elif atom == B: + if atom.charge == 0: + if atom.neighbors == 2: + if atom.is_radical: # C=1O[B]OC=1 double_bonded.add(n) else: - ah = hydrogens[n] - if ah is None: # b1ccccc1, C=1OBOC=1 or B1C=CC=N1 + if atom.implicit_hydrogens is None: # b1ccccc1, C=1OBOC=1 or B1C=CC=N1 pyrroles.add(n) - elif ah == 1: # C=1O[BH]OC=1 or [BH]1C=CC=N1 + elif atom.implicit_hydrogens == 1: # C=1O[BH]OC=1 or [BH]1C=CC=N1 double_bonded.add(n) - elif ah: + elif atom.implicit_hydrogens: raise InvalidAromaticRing - elif not radicals[n]: + elif not atom.is_radical: double_bonded.add(n) else: raise InvalidAromaticRing - elif ac == 1: - if ab == 2 and not radicals[n]: + elif atom.charge == 1: + if atom.neighbors == 2 and not atom.is_radical: double_bonded.add(n) else: raise InvalidAromaticRing - elif ac == -1: - if ab == 2: - if not radicals[n]: # C=1O[B-]OC=1 or [bH-]1ccccc1 + elif atom.charge == -1: + if atom.neighbors == 2: + if not atom.is_radical: # C=1O[B-]OC=1 or [bH-]1ccccc1 pyrroles.add(n) # anion-radical is benzene like - elif radicals[n]: # C=1O[B-*](R)OC=1 + elif atom.is_radical: # C=1O[B-*](R)OC=1 double_bonded.add(n) else: pyrroles.add(n) diff --git a/chython/algorithms/stereo/__init__.py b/chython/algorithms/aromatics/test/__init__.py similarity index 83% rename from chython/algorithms/stereo/__init__.py rename to chython/algorithms/aromatics/test/__init__.py index 18f784a7..031c963a 100644 --- a/chython/algorithms/stereo/__init__.py +++ b/chython/algorithms/aromatics/test/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021 Ramil Nugmanov +# Copyright 2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,8 +16,3 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .graph import * -from .molecule import * - - -__all__ = ['MoleculeStereo', 'Stereo'] diff --git a/tests/algorithms/aromatics/test_kekule.py b/chython/algorithms/aromatics/test/test_kekule.py similarity index 50% rename from tests/algorithms/aromatics/test_kekule.py rename to chython/algorithms/aromatics/test/test_kekule.py index 7465b819..be33e701 100644 --- a/tests/algorithms/aromatics/test_kekule.py +++ b/chython/algorithms/aromatics/test/test_kekule.py @@ -18,137 +18,143 @@ # along with this program; if not, see . # from chython import smiles -import pytest from chython.exceptions import InvalidAromaticRing +from pytest import raises def test_kekule_basic(): # Test basic aromatic ring conversion mol = smiles('c1ccccc1') # benzene assert mol.kekule() # should return True for aromatic rings - - # Verify alternating single and double bonds - bonds = mol._bonds - double_bonds = sum(1 for n, ms in bonds.items() for m, b in ms.items() if b.order == 2 and m > n) - assert double_bonds == 3 # benzene should have 3 double bonds + assert mol == smiles('C1=CC=CC=C1') def test_kekule_pyridine(): # Test pyridine and its derivatives mol = smiles('n1ccccc1') # pyridine assert mol.kekule() - + assert mol == smiles('N1=CC=CC=C1') + assert mol.atom(1).implicit_hydrogens == 0 + # Test protonated pyridine - mol_protonated = smiles('[nH+]1ccccc1') - assert mol_protonated.kekule() + mol = smiles('[nH+]1ccccc1') + assert mol.kekule() + assert mol == smiles('[NH+]1=CC=CC=C1') def test_kekule_pyrrole(): # Test pyrrole and its derivatives mol = smiles('[nH]1cccc1') # pyrrole assert mol.kekule() - + assert mol == smiles('N1C=CC=C1') + assert mol.atom(1).implicit_hydrogens == 1 + + mol = smiles('n1cccc1') + assert mol.kekule() + assert mol == smiles('N1C=CC=C1') + assert mol.atom(1).implicit_hydrogens == 1 + # Test N-methylpyrrole - mol_methyl = smiles('Cn1cccc1') - assert mol_methyl.kekule() + mol = smiles('Cn1cccc1') + assert mol.kekule() + assert mol == smiles('CN1C=CC=C1') + assert mol.atom(2).implicit_hydrogens == 0 def test_kekule_furan_thiophene(): # Test oxygen and sulfur containing aromatics - mol_furan = smiles('o1cccc1') - assert mol_furan.kekule() - - mol_thiophene = smiles('s1cccc1') - assert mol_thiophene.kekule() + mol = smiles('o1cccc1') + assert mol.kekule() + assert mol == smiles('O1C=CC=C1') + assert mol.atom(1).implicit_hydrogens == 0 + + mol = smiles('s1cccc1') + assert mol.kekule() + assert mol == smiles('S1C=CC=C1') + assert mol.atom(1).implicit_hydrogens == 0 def test_kekule_complex_systems(): # Test fused ring systems - mol_naphthalene = smiles('c1ccc2ccccc2c1') - assert mol_naphthalene.kekule() - + mol = smiles('c1ccc2ccccc2c1') + assert mol.kekule() + assert mol == smiles('C1=CC2=CC=CC=C2C=C1') or mol == smiles('C1=CC2=C(C=C1)C=CC=C2') + # Test indole - mol_indole = smiles('c1ccc2[nH]ccc2c1') - assert mol_indole.kekule() + mol = smiles('c1ccc2[nH]ccc2c1') + assert mol.kekule() + assert mol == smiles('N1C=CC2=C1C=CC=C2') or mol == smiles('N1C=CC2=CC=CC=C12') def test_kekule_enumeration(): - # Test enumeration of Kekulé structures - mol = smiles('c1ccccc1') # benzene + mol = smiles('Cc1ccccc1C') forms = list(mol.enumerate_kekule()) - assert len(forms) == 2 # benzene has 2 Kekulé forms + assert len(forms) == 2 # benzene has 2 Kekule forms + assert smiles('CC1=C(C)C=CC=C1') in forms + assert smiles('CC1=CC=CC=C1C') in forms def test_kekule_invalid_structures(): # Test invalid aromatic structures - with pytest.raises(InvalidAromaticRing): + with raises(InvalidAromaticRing): mol = smiles('c1cccc1') # 5-membered carbon ring (invalid aromatic) mol.kekule() - - with pytest.raises(InvalidAromaticRing): + + with raises(InvalidAromaticRing): mol = smiles('c1ccc2c1c3ccccc3cc2') # acenaphthalene (invalid aromatic form) mol.kekule() - - with pytest.raises(InvalidAromaticRing): + + with raises(InvalidAromaticRing): mol = smiles('c1cccc1C(=O)c1cccc1') # cyclopentadiene with carbonyl (invalid aromatic) mol.kekule() def test_kekule_charged_species(): # Test charged aromatic species - mol_pyridinium = smiles('[n+]1ccccc1') - assert mol_pyridinium.kekule() - - mol_cyclopentadienyl = smiles('[cH-]1cccc1') - assert mol_cyclopentadienyl.kekule() + mol = smiles('[n+]1ccccc1') + assert mol.kekule() + assert mol == smiles('C=1[NH+]=CC=CC=1') + + mol = smiles('[cH-]1cccc1') + assert mol.kekule() + assert mol == smiles('C=1C=C[CH-]C=1') def test_kekule_multiple_rings(): # Test molecules with multiple aromatic rings - mol_biphenyl = smiles('c1ccccc1-c2ccccc2') - assert mol_biphenyl.kekule() - - # Test phenylpyridine - mol_phenylpyridine = smiles('c1ccccc1-c2ccccn2') - assert mol_phenylpyridine.kekule() + mol = smiles('c1ccccc1c2ccccc2') + assert mol.kekule() + assert mol == smiles('C1=CC=C(C=C1)C1=CC=CC=C1') def test_kekule_heteroatoms(): # Test various heteroatoms in aromatic rings - mol_pyrazine = smiles('n1ccncc1') # two nitrogens - assert mol_pyrazine.kekule() - - mol_oxazole = smiles('o1cncc1') # oxygen and nitrogen - assert mol_oxazole.kekule() - - mol_thiazole = smiles('s1cncc1') # sulfur and nitrogen - assert mol_thiazole.kekule() + mol = smiles('c1cncn1') # two nitrogens + assert mol.kekule() + assert mol == smiles('N1C=CN=C1') + + mol = smiles('o1cncc1') # oxygen and nitrogen + assert mol.kekule() + assert mol == smiles('C1=COC=N1') def test_kekule_buffer_size(): # Test buffer size parameter for complex heterocycles - mol1 = smiles('c1ccc2[nH]ccc2c1') # indole - assert mol1.kekule(buffer_size=1) # small buffer - - mol2 = smiles('c1ccc2[nH]ccc2c1') # fresh indole instance - assert mol2.kekule(buffer_size=10) # large buffer + mol = smiles('c1ccc2[nH]ccc2c1') # indole + assert mol.kekule(buffer_size=1) # small buffer + + mol = smiles('c1ccc2[nH]ccc2c1') # fresh indole instance + assert mol.kekule(buffer_size=10) # large buffer def test_kekule_radical_species(): - # Test radical aromatic species - mol_phenoxy = smiles('[O]c1ccccc1') - assert mol_phenoxy.kekule() - - # Test radical cation - mol_benzene_radical = smiles('[c]1ccccc1') - assert mol_benzene_radical.kekule() + mol = smiles('[c]1ccccc1') + assert mol.kekule() + assert mol == smiles('C=1C=CC=[C]C=1 |^1:4|') def test_kekule_quinones(): # Test quinone-like structures - mol_benzoquinone = smiles('O=C1C=CC(=O)C=C1') - assert not mol_benzoquinone.kekule() # not aromatic - - # Test semiquinone - mol_semiquinone = smiles('O=C1C=CC(O)C=C1') - assert not mol_semiquinone.kekule() # not aromatic \ No newline at end of file + mol = smiles('O=c1ccc(=O)cc1') + assert mol.kekule() + assert mol == smiles('C1=CC(C=CC1=O)=O') diff --git a/tests/algorithms/aromatics/test_thiele.py b/chython/algorithms/aromatics/test/test_thiele.py similarity index 66% rename from tests/algorithms/aromatics/test_thiele.py rename to chython/algorithms/aromatics/test/test_thiele.py index 5eb19ba5..a8b7d087 100644 --- a/tests/algorithms/aromatics/test_thiele.py +++ b/chython/algorithms/aromatics/test/test_thiele.py @@ -24,7 +24,7 @@ def test_basic_thiele(): # Test basic aromatic systems mol = smiles('C1=CC=CC=C1') # benzene in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic (order 4) for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -34,7 +34,7 @@ def test_pyrrole_thiele(): # Test pyrrole-like systems mol = smiles('N1C=CC=C1') # pyrrole in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -44,7 +44,7 @@ def test_furan_thiele(): # Test furan-like systems mol = smiles('O1C=CC=C1') # furan in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -54,7 +54,7 @@ def test_thiophene_thiele(): # Test thiophene-like systems mol = smiles('S1C=CC=C1') # thiophene in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -64,7 +64,7 @@ def test_condensed_rings(): # Test condensed ring systems mol = smiles('C1=CC=C2C=CC=CC2=C1') # naphthalene in Kekule form assert mol.thiele() - + # Check that all bonds in both rings are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -75,37 +75,34 @@ def test_tautomer_fix(): mol = smiles('N1C=CC2=NC=CC2=C1') # before fix assert mol.thiele(fix_tautomers=True) - # After tautomer fixing, one N should have 0 H and the other should have 1 H - n_with_h = 0 - n_without_h = 0 - for n, atom in mol.atoms(): - if atom.atomic_number == 7: # nitrogen - assert len(mol._bonds[n]) == 2 # should have 2 bonds - if mol._hydrogens[n] == 1: - n_with_h += 1 - elif mol._hydrogens[n] == 0: - n_without_h += 1 - - assert n_with_h == 1 # one nitrogen should have 1 hydrogen - assert n_without_h == 1 # one nitrogen should have 0 hydrogens + for n, m, bond in mol.bonds(): + assert bond.order == 4 + + assert mol.atom(1).implicit_hydrogens == 0 + assert mol.atom(5).implicit_hydrogens == 1 + + mol = smiles('N1C=CC2=NC=CC2=C1') # before fix + assert mol.thiele(fix_tautomers=False) + + for n, m, bond in mol.bonds(): + assert bond.order == 4 + + assert mol.atom(1).implicit_hydrogens == 1 + assert mol.atom(5).implicit_hydrogens == 0 def test_quinone_exclusion(): # Test that quinone-like structures are not aromatized mol = smiles('O=C1C=CC(=O)C=C1') # para-benzoquinone - initial_double_bonds = [(n, m) for n, m, bond in mol.bonds() if bond.order == 2] assert not mol.thiele() # should return False - - # Check that double bonds remain as double bonds - final_double_bonds = [(n, m) for n, m, bond in mol.bonds() if bond.order == 2] - assert len(final_double_bonds) == len(initial_double_bonds) + assert mol == smiles('O=C1C=CC(=O)C=C1') def test_invalid_rings(): # Test that invalid rings are not aromatized mol = smiles('C1=CC=C1') # cyclobutadiene - too small assert not mol.thiele() # should return False - + mol = smiles('C1=CC=CC=CC=CC=C1') # 10-membered ring - too large assert not mol.thiele() # should return False @@ -114,21 +111,20 @@ def test_charged_systems(): # Test charged aromatic systems mol = smiles('[NH+]1C=CC=CC=1') # pyridinium in Kekule form assert mol.thiele() - + # Check that charge is preserved and ring is aromatic - for n, atom in mol.atoms(): - if atom.atomic_number == 7: # nitrogen - assert mol._charges[n] == 1 # should have +1 charge - + assert mol.atom(1).charge == 1 + assert mol.atom(1).implicit_hydrogens == 1 + for n, m, bond in mol.bonds(): assert bond.order == 4 # all bonds should be aromatic def test_freak_rules(): # Test special cases handled by freak rules - mol = smiles('N1C=CN2C=CC=CC1=2') # special N-fused system in Kekule form + mol = smiles('N1C=CN2C=CC=C12') # special N-fused system in Kekule form assert mol.thiele() - + # Check that all bonds are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -137,29 +133,15 @@ def test_freak_rules(): def test_tetracyclic_systems(): # Test 4-membered ring in condensed systems mol = smiles('C1=CC2=C(C=CC=C2)C1') # benzocyclobutene - result = mol.thiele() - - # Only the 6-membered ring should be aromatic - aromatic_bonds = 0 - rings_by_size = {len(ring): ring for ring in mol.sssr} - - for n, m, bond in mol.bonds(): - if bond.order == 4: # aromatic bond - aromatic_bonds += 1 - # Check that this bond belongs to the 6-membered ring - if 6 in rings_by_size: - six_ring = rings_by_size[6] - assert n in six_ring and m in six_ring - - assert aromatic_bonds == 6 # six aromatic bonds in benzene ring - assert result # should return True since part of the molecule is aromatic + assert mol.thiele() + assert mol == smiles('C1C=Cc2ccccc12') def test_phosphorus_rings(): # Test phosphorus-containing aromatic rings mol = smiles('P1C=CC=CC=1') # phosphabenzene in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -169,7 +151,7 @@ def test_boron_rings(): # Test boron-containing aromatic rings mol = smiles('B1C=CC=C1') # borole in Kekule form assert mol.thiele() - + # Check that all bonds in the ring are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 @@ -180,46 +162,43 @@ def test_seven_membered_rings(): # Only boron-containing 7-membered rings should be aromatic mol = smiles('B1C=CC=CC=C1') # 7-membered ring with boron assert mol.thiele() - + mol = smiles('N1C=CC=CC=C1') # 7-membered ring with nitrogen assert not mol.thiele() def test_ferrocene_like(): # Test negatively charged carbon systems (ferrocene-like) - mol = smiles('[C-]1C=CC=C1') # cyclopentadienyl anion + mol = smiles('[CH-]1C=CC=C1') # cyclopentadienyl anion assert mol.thiele() - - # Check charge preservation and aromaticity - charged_carbons = 0 - for n, atom in mol.atoms(): - if atom.atomic_number == 6 and mol._charges[n] == -1: - charged_carbons += 1 - assert charged_carbons == 1 - + for n, m, bond in mol.bonds(): assert bond.order == 4 + assert int(mol) == -1 + assert mol.atom(1).charge == -1 + def test_multiple_components(): # Test systems with multiple aromatic components mol = smiles('C1=CC=CC=C1.C1=CC=CC=C1') # two benzene molecules in Kekule form assert mol.thiele() - + # Check that all bonds in both components are aromatic for n, m, bond in mol.bonds(): assert bond.order == 4 + assert mol.connected_components_count == 2 def test_complex_fused_systems(): # Test complex fused ring systems with multiple heteroatoms - + # Benzothiazole (simplified) mol = smiles('C1=CC=C2SC=NC2=C1') # benzothiazole in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + # Thienopyridine (simplified) mol = smiles('C1=CC=NC2=CSC=C12') # thienopyridine in Kekule form assert mol.thiele() @@ -229,88 +208,56 @@ def test_complex_fused_systems(): def test_complex_charged_systems(): # Test charged aromatic systems - + # Basic charged systems that should work # Pyridinium mol = smiles('[NH+]1C=CC=CC=1') # pyridinium in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + assert mol.atom(1).charge == 1 + assert mol.atom(1).implicit_hydrogens == 1 + # Complex charged systems that are not yet supported # N-methylpyridinium (currently not aromatized properly) mol = smiles('C[N+]1=CC=CC=C1') # N-methylpyridinium in Kekule form assert mol.thiele() + assert mol.atom(2).charge == 1 def test_complex_heterocycles(): # Test heterocyclic systems - + # Basic heterocycles that should work # Benzimidazole mol = smiles('C1=CC=C2NC=NC2=C1') # in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + # Quinoxaline mol = smiles('C1=CC=C2N=CC=NC2=C1') # in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + # Benzimidazole fused to thiophene (works) mol = smiles('C1=CC2=C(C=C1)N=CN2C3=CC=CS3') # in Kekule form - result = mol.thiele() - assert result # use stored result instead of calling thiele() again - - # Check bonds in each ring separately - benzene_ring = {1, 2, 3, 4, 5, 6} # benzene part - imidazole_ring = {3, 4, 7, 8, 9} # imidazole part - thiophene_ring = {10, 11, 12, 13, 14} # thiophene part - + assert mol.thiele() + for n, m, bond in mol.bonds(): # Check if both atoms of the bond are in the same ring - if (n in benzene_ring and m in benzene_ring) or \ - (n in imidazole_ring and m in imidazole_ring) or \ - (n in thiophene_ring and m in thiophene_ring): - assert bond.order == 4, f"Bond {n}-{m} should be aromatic" + assert bond.order == 4 or (bond.order == 1 and n in (9, 10) and m in (9, 10)) def test_complex_bridged_systems(): - # Test bridged aromatic systems - - # Basic bridged systems that should work - # Naphthalene - mol = smiles('C1=CC=C2C=CC=CC2=C1') # in Kekule form - assert mol.thiele() - for n, m, bond in mol.bonds(): - assert bond.order == 4 - - # Quinoline - mol = smiles('C1=CC=C2C=CC=NC2=C1') # in Kekule form - assert mol.thiele() - for n, m, bond in mol.bonds(): - assert bond.order == 4 - # Complex bridged system with multiple heteroatoms (works) mol = smiles('C1=CC2=C(C=C1)N=C3C(=C2)C=CC4=C3N=CS4') # in Kekule form assert mol.thiele() for n, m, bond in mol.bonds(): assert bond.order == 4 - + # Bridged system with N and S (works) mol = smiles('C1=CC2=C(C=C1)SC3=C(N=CC=C3)C=C2') # in Kekule form - result = mol.thiele() - assert result # use stored result instead of calling thiele() again - - # Check bonds in each ring separately - benzene_ring = {1, 2, 3, 4, 5, 6} # benzene part - pyridine_ring = {8, 9, 10, 11, 12, 13} # pyridine part - - for n, m, bond in mol.bonds(): - # Check if both atoms of the bond are in the same ring - if (n in benzene_ring and m in benzene_ring) or \ - (n in pyridine_ring and m in pyridine_ring): - assert bond.order == 4, f"Bond {n}-{m} should be aromatic" - # Bonds between rings or to S can remain non-aromatic \ No newline at end of file + assert mol.thiele() + assert mol == smiles('S1c2ccccc2C=Cc2[n]cccc12') diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index 43030a86..c6682247 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,8 +17,8 @@ # along with this program; if not, see . # from collections import defaultdict -from lazy_object_proxy import Proxy from typing import TYPE_CHECKING +from ._rules import freak_rules from ..rings import _sssr, _connected_components @@ -26,20 +26,14 @@ from chython import MoleculeContainer -def _freaks(): - from ... import smarts - - rules = [] - - q = smarts('[N,O,S;D2;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') - rules.append(q) - - q = smarts('[N;D3;r5;z1]1[A;r5]=,:[A;r5][A;r5]:[A;r5]1') - rules.append(q) - return rules - - -freak_rules = Proxy(_freaks) +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +P = 15 +S = 16 +Se = 34 class Thiele: @@ -56,9 +50,6 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: atoms = self._atoms bonds = self._bonds nsc = self.not_special_connectivity - sh = self.hybridization - charges = self._charges - hydrogens = self._hydrogens rings = defaultdict(set) # aromatic? skeleton. include quinones tetracycles = [] @@ -71,15 +62,15 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: if not 3 < lr < 8: # skip 3-membered and big rings continue # only B C N O P S with 2-3 neighbors. detects this: C1=CC=CP12=CC=CC=C2 - if any(atoms[n].atomic_number not in (6, 7, 8, 16, 5, 15) or len(nsc[n]) > 3 for n in ring): + if any(atoms[n] not in (C, N, O, S, B, P) or len(nsc[n]) > 3 for n in ring): continue - sp2 = sum(sh(n) == 2 for n in ring) + sp2 = sum(atoms[n].hybridization == 2 for n in ring) if sp2 == lr: # benzene like if lr == 4: # two bonds condensed aromatic rings tetracycles.append(ring) else: if fix_tautomers and lr % 2: # find potential pyrroles - acceptors.update(n for n in ring if atoms[n].atomic_number == 7 and not charges[n]) + acceptors.update(n for n in ring if (a := atoms[n]) == N and not a.charge) n, *_, m = ring rings[n].add(m) rings[m].add(n) @@ -88,28 +79,27 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: rings[m].add(n) elif 4 < lr == sp2 + 1: # pyrroles, furanes, etc try: - n = next(n for n in ring if sh(n) == 1) + n = next(n for n in ring if atoms[n].hybridization == 1) except StopIteration: # exotic, just skip continue - an = atoms[n].atomic_number - if (c := charges[n]) == -1: - if an != 6 or lr != 5: # skip any but ferrocene + if (a := atoms[n]).charge == -1: + if a != C or lr != 5: # skip any but ferrocene continue - elif c: # skip any charged + elif a.charge: # skip any charged continue elif lr == 7: # skip electron-rich 7-membered rings - if an != 5: # not B? + if a != 5: # not B? continue # below lr == 5 or 6 only - elif an in (8, 16, 34): # O, S, Se + elif a in (O, S, Se): if len(bonds[n]) != 2: # like CS1(C)C=CC=C1 continue - elif an == 7: + elif a == N: if (b := len(bonds[n])) > 3: # extra check for invalid N(IV) continue elif fix_tautomers and lr == 6 and b == 2: donors.append(n) - elif an in (5, 15): # B, P + elif a in (B, P): if len(bonds[n]) > 3: continue else: # only B, [C-], N, O, P, S, Se @@ -129,8 +119,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: return False # check out-of-ring double bonds - double_bonded = {n for n in rings if any(m not in rings[n] and b.order == 2 - for m, b in bonds[n].items())} + double_bonded = {n for n in rings if any(m not in rings[n] and b == 2 for m, b in bonds[n].items())} # fix_tautomers if fix_tautomers and acceptors and donors: @@ -149,8 +138,8 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: acceptors.discard(current) pyrroles.discard(start) pyrroles.add(current) - hydrogens[current] = 1 - hydrogens[start] = 0 + atoms[current]._implicit_hydrogens = 1 + atoms[start]._implicit_hydrogens = 0 break else: continue @@ -159,13 +148,15 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: seen.add(current) new_order = 1 if order == 2 else 2 stack.extend((current, n, depth, new_order) for n in rings[current] if - n not in seen and n not in double_bonded and bonds[current][n].order == order) + n not in seen and n not in double_bonded and bonds[current][n] == order) else: # path not found continue for n, m, o in path: - bonds[n][m]._Bond__order = o # noqa + bonds[n][m]._order = o if not acceptors: break + self.flush_cache(keep_sssr=True, keep_components=True) + self.calc_labels() if double_bonded: # delete quinones for n in double_bonded: @@ -205,27 +196,29 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: for ring in tetracycles: if seen.issuperset(ring): n, *_, m = ring - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 1 # noqa + bonds[n][m]._order = 1 for ring in rings: n, *_, m = ring - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 - self.flush_cache() + self.flush_cache(keep_sssr=True, keep_components=True) + self.calc_labels() for ring in freaks: # aromatize rule based for q in freak_rules: if next(q.get_mapping(self, searching_scope=ring, automorphism_filter=False), None): n, *_, m = ring - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 for n, m in zip(ring, ring[1:]): - bonds[n][m]._Bond__order = 4 # noqa + bonds[n][m]._order = 4 break if freaks: - self.flush_cache() # flush again + self.flush_cache(keep_sssr=True, keep_components=True) # flush again + self.calc_labels() self.fix_stereo() # check if any stereo centers vanished. return True diff --git a/chython/algorithms/calculate2d/__init__.py b/chython/algorithms/calculate2d/__init__.py index bf3a1af1..f0c35a13 100644 --- a/chython/algorithms/calculate2d/__init__.py +++ b/chython/algorithms/calculate2d/__init__.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov -# Copyright 2019, 2020 Dinar Batyrshin +# Copyright 2019-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,197 +16,8 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from math import sqrt -from random import random -from typing import TYPE_CHECKING, Union -from ...exceptions import ImplementationError - - -try: - from importlib.resources import files -except ImportError: # python3.8 - from importlib_resources import files - - -if TYPE_CHECKING: - from chython import ReactionContainer, MoleculeContainer - -try: - from py_mini_racer import MiniRacer, JSEvalException - - ctx = MiniRacer() - ctx.eval('const self = this') - ctx.eval(files(__package__).joinpath('clean2d.js').read_text()) -except RuntimeError: - ctx = None - - -class Calculate2DMolecule: - __slots__ = () - - def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule']): - """ - Calculate 2d layout of graph. https://pubs.acs.org/doi/10.1021/acs.jcim.7b00425 JS implementation used. - """ - if ctx is None: - raise ImportError('py_mini_racer is not installed or broken') - plane = {} - entry = iter(sorted(self, key=lambda n: len(self._bonds[n]))) - for _ in range(min(5, len(self))): - smiles, order = self.__clean2d_prepare(next(entry)) - try: - xy = ctx.call('$.clean2d', smiles) - except JSEvalException: - continue - break - else: - raise ImplementationError - - shift_x, shift_y = xy[0] - for n, (x, y) in zip(order, xy): - plane[n] = (x - shift_x, shift_y - y) - - bonds = [] - for n, m, _ in self.bonds(): - xn, yn = plane[n] - xm, ym = plane[m] - bonds.append(sqrt((xm - xn) ** 2 + (ym - yn) ** 2)) - if bonds: - bond_reduce = sum(bonds) / len(bonds) / .825 - else: - bond_reduce = 1. - - self_plane = self._plane - for n, (x, y) in plane.items(): - self_plane[n] = (x / bond_reduce, y / bond_reduce) - - if self.connected_components_count > 1: - shift_x = 0. - for c in self.connected_components: - shift_x = self._fix_plane_mean(shift_x, component=c) + .9 - self.__dict__.pop('__cached_method__repr_svg_', None) - - def _fix_plane_mean(self: 'MoleculeContainer', shift_x: float, shift_y=0., component=None) -> float: - plane = self._plane - if component is None: - component = plane - - left_atom = min(component, key=lambda x: plane[x][0]) - right_atom = max(component, key=lambda x: plane[x][0]) - - min_x = plane[left_atom][0] - shift_x - if len(self._atoms[left_atom].atomic_symbol) == 2: - min_x -= .2 - - max_x = plane[right_atom][0] - min_x - min_y = min(plane[x][1] for x in component) - max_y = max(plane[x][1] for x in component) - mean_y = (max_y + min_y) / 2 - shift_y - for n in component: - x, y = plane[n] - plane[n] = (x - min_x, y - mean_y) - - if -.18 <= plane[right_atom][1] <= .18: - factor = self._hydrogens[right_atom] - if factor == 1: - max_x += .15 - elif factor: - max_x += .25 - return max_x - - def _fix_plane_min(self: 'MoleculeContainer', shift_x: float, shift_y=0., component=None) -> float: - plane = self._plane - if component is None: - component = plane - - right_atom = max(component, key=lambda x: plane[x][0]) - min_x = min(plane[x][0] for x in component) - shift_x - max_x = plane[right_atom][0] - min_x - min_y = min(plane[x][1] for x in component) - shift_y - - for n in component: - x, y = plane[n] - plane[n] = (x - min_x, y - min_y) - - if shift_y - .18 <= plane[right_atom][1] <= shift_y + .18: - factor = self._hydrogens[right_atom] - if factor == 1: - max_x += .15 - elif factor: - max_x += .25 - return max_x - - def __clean2d_prepare(self: 'MoleculeContainer', entry): - hydrogens = self._hydrogens - charges = self._charges - allenes_stereo = self._allenes_stereo - atoms_stereo = self._atoms_stereo - self._charges = self._hydrogens = {n: 0 for n in hydrogens} - self._atoms_stereo = self._allenes_stereo = {} - w = {n: random() for n in hydrogens} - w[entry] = -1 - try: - smiles, order = self._smiles(w.__getitem__, random=True, _return_order=True) - finally: - self._hydrogens = hydrogens - self._charges = charges - self._allenes_stereo = allenes_stereo - self._atoms_stereo = atoms_stereo - return ''.join(smiles).replace('~', '-'), order - - -class Calculate2DReaction: - __slots__ = () - - def clean2d(self: 'ReactionContainer'): - """ - Recalculate 2d coordinates - """ - for m in self.molecules(): - m.clean2d() - self.fix_positions() - - def fix_positions(self: 'ReactionContainer'): - """ - Fix coordinates of molecules in reaction - """ - shift_x = 0 - reactants = self.reactants - amount = len(reactants) - 1 - signs = [] - for m in reactants: - max_x = m._fix_plane_mean(shift_x) - if amount: - max_x += .2 - signs.append(max_x) - amount -= 1 - shift_x = max_x + 1 - arrow_min = shift_x - - if self.reagents: - shift_x += .4 - for m in self.reagents: - max_x = m._fix_plane_min(shift_x, .5) - shift_x = max_x + 1 - shift_x += .4 - if shift_x - arrow_min < 3: - shift_x = arrow_min + 3 - else: - shift_x += 3 - arrow_max = shift_x - 1 - - products = self.products - amount = len(products) - 1 - for m in products: - max_x = m._fix_plane_mean(shift_x) - if amount: - max_x += .2 - signs.append(max_x) - amount -= 1 - shift_x = max_x + 1 - self._arrow = (arrow_min, arrow_max) - self._signs = tuple(signs) - self.flush_cache() +from .molecule import * +from .reaction import * __all__ = ['Calculate2DMolecule', 'Calculate2DReaction'] diff --git a/chython/algorithms/calculate2d/molecule.py b/chython/algorithms/calculate2d/molecule.py new file mode 100644 index 00000000..cca6a137 --- /dev/null +++ b/chython/algorithms/calculate2d/molecule.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019-2025 Ramil Nugmanov +# Copyright 2019, 2020 Dinar Batyrshin +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from math import sqrt +from random import random +from typing import TYPE_CHECKING, Union, Dict +from ...exceptions import ImplementationError +from ...periodictable.base.vector import Vector + + +try: + from importlib.resources import files +except ImportError: # python3.8 + from importlib_resources import files + + +if TYPE_CHECKING: + from chython import MoleculeContainer + +try: + from py_mini_racer import MiniRacer, JSEvalException + + ctx = MiniRacer() + ctx.eval('const self = this') + ctx.eval(files(__package__).joinpath('clean2d.js').read_text()) +except RuntimeError: + ctx = None + + +class Calculate2DMolecule: + __slots__ = () + _atoms: Dict[int, 'Element'] + _bonds: Dict[int, Dict[int, 'Bond']] + + def clean2d(self: Union['MoleculeContainer', 'Calculate2DMolecule']): + """ + Calculate 2d layout of graph. https://pubs.acs.org/doi/10.1021/acs.jcim.7b00425 JS implementation used. + """ + if ctx is None: + raise ImportError('py_mini_racer is not installed or broken') + plane = {} + entry = iter(sorted(self, key=lambda n: len(self._bonds[n]))) + for _ in range(min(5, len(self))): + smiles, order = self.__clean2d_prepare(next(entry)) + try: + xy = ctx.call('$.clean2d', smiles) + except JSEvalException: + continue + break + else: + raise ImplementationError + + shift_x, shift_y = xy[0] + for n, (x, y) in zip(order, xy): + plane[n] = (x - shift_x, shift_y - y) + + bonds = [] + for n, m, _ in self.bonds(): + xn, yn = plane[n] + xm, ym = plane[m] + bonds.append(sqrt((xm - xn) ** 2 + (ym - yn) ** 2)) + if bonds: + bond_reduce = sum(bonds) / len(bonds) / .825 + else: + bond_reduce = 1. + + atoms = self._atoms + for n, (x, y) in plane.items(): + atoms[n].xy = (x / bond_reduce, y / bond_reduce) + + if self.connected_components_count > 1: + shift_x = 0. + for c in self.connected_components: + shift_x = self._fix_plane_mean(shift_x, component=c) + .9 + self.__dict__.pop('__cached_method__repr_svg_', None) + + def _fix_plane_mean(self, shift_x: float, shift_y=0., component=None) -> float: + atoms = self._atoms + if component is None: + component = atoms + + left_atom = atoms[min(component, key=lambda x: atoms[x].x)] + right_atom = atoms[max(component, key=lambda x: atoms[x].x)] + + min_x = left_atom.x - shift_x + if len(left_atom.atomic_symbol) == 2: + min_x -= .2 + + max_x = right_atom.x - min_x + min_y = min(atoms[x].y for x in component) + max_y = max(atoms[x].y for x in component) + mean_y = (max_y + min_y) / 2 - shift_y + delta = Vector(min_x, mean_y) + for n in component: + atoms[n].xy -= delta + + if -.18 <= right_atom.y <= .18: + factor = right_atom.implicit_hydrogens + if factor == 1: + max_x += .15 + elif factor: + max_x += .25 + return max_x + + def _fix_plane_min(self, shift_x: float, shift_y=0., component=None) -> float: + atoms = self._atoms + if component is None: + component = atoms + + right_atom = atoms[max(component, key=lambda x: atoms[x].x)] + min_x = min(atoms[x].x for x in component) - shift_x + max_x = right_atom.x - min_x + min_y = min(atoms[x].y for x in component) - shift_y + delta = Vector(min_x, min_y) + for n in component: + atoms[n].xy -= delta + + if shift_y - .18 <= right_atom.y <= shift_y + .18: + factor = right_atom.implicit_hydrogens + if factor == 1: + max_x += .15 + elif factor: + max_x += .25 + return max_x + + def __clean2d_prepare(self: 'MoleculeContainer', entry): + w = {n: random() for n in self._atoms} + w[entry] = -1 + smiles, order = self._smiles(w.__getitem__, random=True, charges=False, stereo=False, _return_order=True) + return ''.join(smiles).replace('~', '-'), order + + +__all__ = ['Calculate2DMolecule'] diff --git a/chython/algorithms/calculate2d/reaction.py b/chython/algorithms/calculate2d/reaction.py new file mode 100644 index 00000000..536643aa --- /dev/null +++ b/chython/algorithms/calculate2d/reaction.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019-2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from typing import TYPE_CHECKING + + +if TYPE_CHECKING: + from chython import ReactionContainer + + +class Calculate2DReaction: + __slots__ = () + + def clean2d(self: 'ReactionContainer'): + """ + Recalculate 2d coordinates + """ + for m in self.molecules(): + m.clean2d() + self.fix_positions() + + def fix_positions(self: 'ReactionContainer'): + """ + Fix coordinates of molecules in reaction + """ + shift_x = 0 + reactants = self.reactants + amount = len(reactants) - 1 + signs = [] + for m in reactants: + max_x = m._fix_plane_mean(shift_x) + if amount: + max_x += .2 + signs.append(max_x) + amount -= 1 + shift_x = max_x + 1 + arrow_min = shift_x + + if self.reagents: + shift_x += .4 + for m in self.reagents: + max_x = m._fix_plane_min(shift_x, .5) + shift_x = max_x + 1 + shift_x += .4 + if shift_x - arrow_min < 3: + shift_x = arrow_min + 3 + else: + shift_x += 3 + arrow_max = shift_x - 1 + + products = self.products + amount = len(products) - 1 + for m in products: + max_x = m._fix_plane_mean(shift_x) + if amount: + max_x += .2 + signs.append(max_x) + amount -= 1 + shift_x = max_x + 1 + self._arrow = (arrow_min, arrow_max) + self._signs = tuple(signs) + self.flush_cache(keep_molecule_cache=True) + + +__all__ = ['Calculate2DReaction'] diff --git a/chython/algorithms/depict.py b/chython/algorithms/depict.py index 4eab3f82..5b00406b 100644 --- a/chython/algorithms/depict.py +++ b/chython/algorithms/depict.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2022 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # Copyright 2019-2020 Dinar Batyrshin # This file is part of chython. # @@ -206,17 +206,16 @@ def depict(self: Union['MoleculeContainer', 'DepictMolecule'], *, width=None, he :param clean2d: calculate coordinates if necessary. """ uid = str(uuid4()) - values = self._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in self.atoms()) + max_x = max(a.x for _, a in self.atoms()) + min_y = min(a.y for _, a in self.atoms()) + max_y = max(a.y for _, a in self.atoms()) if clean2d and len(self) > 1 and max_y - min_y < .01 and max_x - min_x < 0.01: self.clean2d() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in self.atoms()) + max_x = max(a.x for _, a in self.atoms()) + min_y = min(a.y for _, a in self.atoms()) + max_y = max(a.y for _, a in self.atoms()) bonds = self.__render_bonds() atoms, define, masks = self.__render_atoms(uid) @@ -247,8 +246,8 @@ def _repr_svg_(self): return self.depict() def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): + atoms = self._atoms svg = [] - plane = self._plane double_space = _render_config['double_space'] triple_space = _render_config['triple_space'] wedge_space = _render_config['wedge_space'] @@ -260,8 +259,8 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): wedge[n].add(m) wedge[m].add(n) - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy ny, my = -ny, -my dx, dy = _rotate_vector(0, wedge_space, mx - nx, ny - my) @@ -271,17 +270,16 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): for n, m, bond in self.bonds(): if m in wedge[n]: continue - order = bond.order - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy ny, my = -ny, -my - if order in (1, 4): + if bond in (1, 4): svg.append(f' ') - elif order == 2: + elif bond == 2: dx, dy = _rotate_vector(0, double_space, mx - nx, ny - my) svg.append(f' ') svg.append(f' ') - elif order == 3: + elif bond == 3: dx, dy = _rotate_vector(0, triple_space, mx - nx, ny - my) svg.append(f' ') svg.append(f' ') @@ -291,18 +289,18 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): f'stroke-dasharray="{dash1:.2f} {dash2:.2f}"/>') for ring in self.aromatic_rings: - cx = sum(plane[n][0] for n in ring) / len(ring) - cy = sum(plane[n][1] for n in ring) / len(ring) + cx = sum(atoms[n].x for n in ring) / len(ring) + cy = sum(atoms[n].y for n in ring) / len(ring) for n, m in zip(ring, ring[1:]): - nx, ny = plane[n] - mx, my = plane[m] + nx, ny = atoms[n].xy + mx, my = atoms[m].xy aromatic = _render_aromatic_bond(nx, ny, mx, my, cx, cy) if aromatic: svg.append(aromatic) - nx, ny = plane[ring[-1]] - mx, my = plane[ring[0]] + nx, ny = atoms[ring[-1]].xy + mx, my = atoms[ring[0]].xy aromatic = _render_aromatic_bond(nx, ny, mx, my, cx, cy) if aromatic: svg.append(aromatic) @@ -310,10 +308,6 @@ def __render_bonds(self: Union['MoleculeContainer', 'DepictMolecule']): def __render_atoms(self: 'MoleculeContainer', uid): bonds = self._bonds - plane = self._plane - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens carbon = _render_config['carbon'] mapping = _render_config['mapping'] @@ -348,9 +342,6 @@ def __render_atoms(self: 'MoleculeContainer', uid): stroke_width_o = other_size * .1 stroke_width_m = mapping_size * .1 - # for cumulenes - cumulenes = {y for x in self._cumulenes(heteroatoms=True) if len(x) > 2 for y in x[1:-1]} - svg = [] maps = [] symbols = [] @@ -359,15 +350,15 @@ def __render_atoms(self: 'MoleculeContainer', uid): define = [] mask = [] - for n, atom in self._atoms.items(): - x, y = plane[n] - y = -y + for n, atom in self.atoms(): + x, y = atom.x, -atom.y symbol = atom.atomic_symbol - if not bonds[n] or symbol != 'C' or carbon or charges[n] or radicals[n] or atom.isotope or n in cumulenes: - if charges[n]: + if (symbol != 'C' or atom.charge or atom.is_radical or atom.isotope or carbon + or not bonds[n] or sum(b == 2 for b in bonds[n].values()) == 2): + if atom.charge: others.append(f' ' - f'{_render_charge[charges[n]]}{"↑" if radicals[n] else ""}') - elif radicals[n]: + f'{_render_charge[atom.charge]}{"↑" if atom.is_radical else ""}') + elif atom.is_radical: others.append(f' ↑') if atom.isotope: others.append(f' ') - h = hydrogens[n] + h = atom.implicit_hydrogens if h == 1: h = 'H' elif h: @@ -463,11 +454,10 @@ def depict(self: 'ReactionContainer', *, width=None, height=None, clean2d: bool if clean2d: for m in self.molecules(): if len(m) > 1: - values = m._plane.values() # noqa - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() self.fix_positions() diff --git a/chython/algorithms/fingerprints/__init__.py b/chython/algorithms/fingerprints/__init__.py index 0f6febf1..ec2121fa 100644 --- a/chython/algorithms/fingerprints/__init__.py +++ b/chython/algorithms/fingerprints/__init__.py @@ -32,7 +32,7 @@ class Fingerprints(LinearFingerprint, MorganFingerprint): @property def _atom_identifiers(self: 'MoleculeContainer'): return {idx: hash((atom.isotope or 0, atom.atomic_number, atom.charge, atom.is_radical)) - for idx, atom in self._atoms.items()} + for idx, atom in self.atoms()} class FingerprintsCGR(LinearFingerprint, MorganFingerprint): diff --git a/chython/algorithms/fingerprints/test/__init__.py b/chython/algorithms/fingerprints/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/algorithms/fingerprints/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/chython/algorithms/fingerprints/test/test_linear.py b/chython/algorithms/fingerprints/test/test_linear.py new file mode 100644 index 00000000..d0accfc8 --- /dev/null +++ b/chython/algorithms/fingerprints/test/test_linear.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# Copyright 2025 Tagir Akhmetshin +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +import numpy as np +from chython import smiles +from pytest import mark, raises + + +def test_basic(): + # Test basic fingerprint generation + mol = smiles('CCO') + fp = mol.linear_fingerprint(min_radius=1, max_radius=2, length=1024) + + # Test array properties + assert isinstance(fp, np.ndarray) + assert fp.dtype == np.uint8 + assert fp.shape == (1024,) + + # Test binary nature + assert set(np.unique(fp)).issubset({0, 1}) + + # Test different lengths + fp = mol.linear_fingerprint(length=2048) + assert isinstance(fp, np.ndarray) + assert fp.dtype == np.uint8 + assert fp.shape == (2048,) + + +@mark.parametrize('radius', [(0, 1), (1, 0), (-1, 2), (2, 1)]) +def test_invalid_radius(radius): + mol = smiles('CCO') + min_r, max_r = radius + with raises(AssertionError): + mol.morgan_fingerprint(min_radius=min_r, max_radius=max_r) + + +def test_consistency(): + # Test that fingerprints are consistent for the same molecule + fp1 = smiles('CCO').linear_fingerprint() + fp2 = smiles('OCC').linear_fingerprint() + + # Test exact equality of arrays + assert np.array_equal(fp1, fp2) + + # Test different molecules give different fingerprints) + fp3 = smiles('CCC').linear_fingerprint() + assert not np.array_equal(fp1, fp3) + + +def test_parameters(): + mol = smiles('CCO') + + # Test different radius parameters + fp1 = mol.linear_fingerprint(min_radius=1, max_radius=2) + fp2 = mol.linear_fingerprint(min_radius=1, max_radius=3) + assert not np.array_equal(fp1, fp2) + assert np.array_equal(fp1 & fp2, fp1) + + # Test number of active bits + fp3 = mol.linear_fingerprint(number_active_bits=2) + fp4 = mol.linear_fingerprint(number_active_bits=3) + assert not np.array_equal(fp3, fp4) + assert np.array_equal(fp3 & fp4, fp3) + + +def test_bit_pairs(): + # Test the number_bit_pairs parameter + mol = smiles('CCCCCCCCCCCCCCCCCCCCCCCC') # molecule with multiple similar fragments + + # Compare different number_bit_pairs settings + fp1 = mol.linear_fingerprint(number_bit_pairs=2) + fp2 = mol.linear_fingerprint(number_bit_pairs=3) + + assert not np.array_equal(fp1, fp2) + assert np.array_equal(fp1 & fp2, fp1) + + +def test_edge_cases(): + fp1 = smiles('C').linear_fingerprint() + assert fp1.sum() == 2 + + fp1 = smiles('CC').linear_fingerprint() + fp2 = smiles('CC.CC').linear_fingerprint() + assert not np.array_equal(fp1, fp2) + assert np.array_equal(fp1 & fp2, fp1) + + +def test_comparison(): + # Test fingerprint comparison between similar molecules + mol1 = smiles('CCO') + mol2 = smiles('CCC') + mol3 = smiles('CCCO') + + fp1 = mol1.linear_fingerprint() + fp2 = mol2.linear_fingerprint() + fp3 = mol3.linear_fingerprint() + + # Calculate Tanimoto similarities + def tanimoto(a, b): + intersection = np.sum(np.logical_and(a, b)) + union = np.sum(np.logical_or(a, b)) + return intersection / union if union > 0 else 0.0 + + # Similar molecules should have higher similarity + sim12 = tanimoto(fp1, fp2) + sim13 = tanimoto(fp1, fp3) + sim23 = tanimoto(fp2, fp3) + + assert 0 <= sim12 <= 1 + assert 0 <= sim13 <= 1 + assert 0 <= sim23 <= 1 diff --git a/chython/algorithms/fingerprints/test/test_morgan.py b/chython/algorithms/fingerprints/test/test_morgan.py new file mode 100644 index 00000000..5b32e21f --- /dev/null +++ b/chython/algorithms/fingerprints/test/test_morgan.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# Copyright 2025 Tagir Akhmetshin +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles +import numpy as np +from pytest import mark, raises + + +def test_basic(): + # Test basic fingerprint generation + mol = smiles('CCO') + fp = mol.morgan_fingerprint(min_radius=1, max_radius=2, length=1024) + + # Test array properties + assert isinstance(fp, np.ndarray) + assert fp.dtype == np.uint8 + assert fp.shape == (1024,) + + # Test binary nature + assert set(np.unique(fp)).issubset({0, 1}) + + # Test different lengths + fp = mol.morgan_fingerprint(length=2048) + assert isinstance(fp, np.ndarray) + assert fp.dtype == np.uint8 + assert fp.shape == (2048,) + + +@mark.parametrize('radius', [(0, 1), (1, 0), (-1, 2), (2, 1)]) +def test_invalid_radius(radius): + mol = smiles('CCO') + min_r, max_r = radius + with raises(AssertionError): + mol.morgan_fingerprint(min_radius=min_r, max_radius=max_r) + + +def test_consistency(): + # Test that fingerprints are consistent for the same molecule + fp1 = smiles('CCO').morgan_fingerprint() + fp2 = smiles('OCC').morgan_fingerprint() + + # Test exact equality of arrays + assert np.array_equal(fp1, fp2) + + # Test different molecules give different fingerprints) + fp3 = smiles('CCC').morgan_fingerprint() + assert not np.array_equal(fp1, fp3) + + +def test_parameters(): + mol = smiles('CCO') + + # Test different radius parameters + fp1 = mol.morgan_fingerprint(min_radius=1, max_radius=2) + fp2 = mol.morgan_fingerprint(min_radius=1, max_radius=3) + assert not np.array_equal(fp1, fp2) + assert np.array_equal(fp1 & fp2, fp1) + + # Test number of active bits + fp3 = mol.morgan_fingerprint(number_active_bits=2) + fp4 = mol.morgan_fingerprint(number_active_bits=3) + assert not np.array_equal(fp3, fp4) + assert np.array_equal(fp3 & fp4, fp3) diff --git a/chython/algorithms/isomorphism.py b/chython/algorithms/isomorphism.py index 76791e70..914a68d9 100644 --- a/chython/algorithms/isomorphism.py +++ b/chython/algorithms/isomorphism.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2024 Ramil Nugmanov +# Copyright 2018-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,15 +19,23 @@ from array import array from collections import defaultdict, deque from functools import cached_property, partial +from io import BytesIO from itertools import permutations +from struct import Struct from typing import Any, Collection, Dict, Iterator, Optional, TYPE_CHECKING, Union from .._functions import lazy_product -from ..periodictable.element import Element, Query, AnyElement, AnyMetal, ListElement +from ..periodictable import Element, Query, AnyElement, AnyMetal, ListElement, QueryElement, ExtendedQuery if TYPE_CHECKING: from chython.containers.graph import Graph - from chython.containers import MoleculeContainer, QueryContainer + from chython.containers import MoleculeContainer + + +header_struct = Struct('I') +m_atom_struct = Struct('QQQQIII') +q_atom_struct = Struct('QQQQIIIII') +bond_struct = Struct('QI') class Isomorphism: @@ -49,14 +57,6 @@ def __gt__(self, other): def __ge__(self, other): return other.is_substructure(self) - def __contains__(self: 'Graph', other: Union[Element, Query, str]): - """ - Atom in Structure test. - """ - if isinstance(other, str): - return any(other == x.atomic_symbol for x in self._atoms.values()) - return any(other == x for x in self._atoms.values()) - def is_substructure(self, other, /) -> bool: """ Test self is substructure of other @@ -79,23 +79,7 @@ def is_equal(self, other, /) -> bool: return False return True - def is_automorphic(self): - """ - Test for automorphism symmetry of graph. - """ - try: - next(self.get_automorphism_mapping()) - except StopIteration: - return False - return True - - def get_automorphism_mapping(self: 'Graph') -> Iterator[Dict[int, int]]: - """ - Iterator of all possible automorphism mappings. - """ - return _get_automorphism_mapping(self.atoms_order, self._bonds) - - def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=None, + def _get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter=True, searching_scope=None, components=None, get_mapping=None) -> Iterator[Dict[int, int]]: if components is None: # ad-hoc for QueryContainer components, closures = self._compiled_query @@ -106,7 +90,7 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No seen = set() if len(components) == 1: - for candidate in other._connected_components: + for candidate in other.connected_components: if searching_scope: candidate = searching_scope.intersection(candidate) if not candidate: @@ -119,7 +103,7 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No seen.add(atoms) yield mapping else: - for candidates in permutations(other._connected_components, len(components)): + for candidates in permutations(other.connected_components, len(components)): mappers = [] for component, candidate in zip(components, candidates): if searching_scope: @@ -141,29 +125,81 @@ def _get_mapping(self, other, /, *, automorphism_filter=True, searching_scope=No @cached_property def _compiled_query(self: 'Graph'): - components, closures = _compile_query(self._atoms, self._bonds) - if self.connected_components_count > 1: - order = {x: n for n, c in enumerate(self.connected_components) for x in c} - components.sort(key=lambda x: order[x[0][0]]) - return components, closures + return _compile_query(self._atoms, self._bonds) class MoleculeIsomorphism(Isomorphism): + __slots__ = () + + def __contains__(self: 'MoleculeContainer', other: Union[Element, Query, str]): + """ + Atom in Structure test. + """ + if isinstance(other, str): + return any(other == a.atomic_symbol for _, a in self.atoms()) + return any(other == a for _, a in self.atoms()) + + def is_automorphic(self): + """ + Test for automorphism symmetry of graph. + """ + try: + next(self.get_automorphism_mapping()) + except StopIteration: + return False + return True + + def get_automorphism_mapping(self: 'MoleculeContainer') -> Iterator[Dict[int, int]]: + """ + Iterator of all possible automorphism mappings. + """ + return _get_automorphism_mapping(self._chiral_morgan, self._bonds) + def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, - searching_scope: Optional[Collection[int]] = None): + searching_scope: Optional[Collection[int]] = None, match_stereo: bool = False): """ Get self to other Molecule substructure mapping generator. :param other: Molecule :param automorphism_filter: Skip matches to the same atoms. :param searching_scope: substructure atoms list to localize isomorphism. + :param match_stereo: test stereo labels matches. slow algorithm, thus disabled by default. + """ + if not isinstance(other, MoleculeIsomorphism): + raise TypeError('MoleculeContainer expected') + + for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter or match_stereo, + searching_scope=searching_scope): + if match_stereo: + sub = other.substructure(mapping.values()) # extract matched subgraph + fm = self.get_fast_mapping(sub) + if not fm: # check mor matching with stereo labels too + continue + yield fm + if not automorphism_filter: + for auto in sub.get_automorphism_mapping(): # enumerate all possible automorphisms + yield {n: auto[m] for n, m in fm.items()} + else: + yield mapping + + def get_fast_mapping(self, other: 'MoleculeContainer') -> Optional[Dict[int, int]]: + """ + Get self to other fast (suboptimal) structure mapping. + Only one possible atoms mapping returned. + Effective only for big molecules. """ if isinstance(other, MoleculeIsomorphism): - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) + if len(self) != len(other): + return + so = self.smiles_atoms_order + oo = other.smiles_atoms_order + if self != other: + return + return dict(zip(so, oo)) raise TypeError('MoleculeContainer expected') @cached_property - def _cython_compiled_structure(self): + def _cython_compiled_structure(self: 'MoleculeContainer'): # long I: # bond: single, double, triple, aromatic, special = 5 bit # bond in ring: 2 bit @@ -185,15 +221,6 @@ def _cython_compiled_structure(self): # long IV: # ring_sizes: not-in-ring bit, 3-atom ring, 4-...., 65-atom ring - from ..files._mdl.mol import common_isotopes - - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - neighbors = self.neighbors - heteroatoms = self.heteroatoms - rings_sizes = self.atoms_rings_sizes - hybridization = self.hybridization mapping = {} numbers = [] @@ -201,10 +228,10 @@ def _cython_compiled_structure(self): bits2 = [] bits3 = [] bits4 = [] - for i, (n, a) in enumerate(self._atoms.items()): + for i, (n, a) in enumerate(self.atoms()): mapping[n] = i numbers.append(n) - v2 = 1 << (hybridization(n) - 1) + v2 = 1 << (a.hybridization - 1) if (an := a.atomic_number) > 56: if an > 116: # Ts, Og an = 116 @@ -214,24 +241,24 @@ def _cython_compiled_structure(self): v1 = 1 << (57 - an) if a.isotope: - v3 = 1 << (a.isotope - common_isotopes[a.atomic_symbol] + 54) - if radicals[n]: + v3 = 1 << (a.isotope - a.mdl_isotope + 54) + if a.is_radical: v3 |= 0x200000000000 else: v3 |= 0x100000000000 - elif radicals[n]: + elif a.is_radical: v3 = 0x8000200000000000 else: v3 = 0x8000100000000000 - v3 |= 1 << (charges[n] + 39) - v3 |= 1 << ((hydrogens[n] or 0) + 30) - v3 |= 1 << (neighbors(n) + 15) - v3 |= 1 << heteroatoms(n) + v3 |= 1 << (a.charge + 39) + v3 |= 1 << ((a.implicit_hydrogens or 0) + 30) + v3 |= 1 << (a.neighbors + 15) + v3 |= 1 << a.heteroatoms - if n in rings_sizes: + if a.ring_sizes: v4 = 0 - for r in rings_sizes[n]: + for r in a.ring_sizes: if r > 65: # big rings not supported continue v4 |= 1 << (65 - r) @@ -256,15 +283,14 @@ def _cython_compiled_structure(self): for j, (m, b) in enumerate(ms.items(), start): indices[j] = x = mapping[m] v = bits1[x] - o = b.order - if o == 1: + if b == 1: v |= 0x0800000000000000 - elif o == 4: - v |= 0x4000000000000000 - elif o == 2: + elif b == 2: v |= 0x1000000000000000 - elif o == 3: + elif b == 3: v |= 0x2000000000000000 + elif b == 4: + v |= 0x4000000000000000 else: v |= 0x8000000000000000 v |= 0x0400000000000000 if b.in_ring else 0x0200000000000000 @@ -272,42 +298,92 @@ def _cython_compiled_structure(self): start += len(ms) o_to[i] = start - return (array('L', numbers), array('Q', bits1), array('Q', bits2), array('Q', bits3), array('Q', bits4), - array('Q', bonds), array('I', o_from), array('I', o_to), array('I', indices)) + buffer = BytesIO() + buffer.write(header_struct.pack(len(numbers))) + for x in zip(bits1, bits2, bits3, bits4, o_from, o_to, numbers): + buffer.write(m_atom_struct.pack(*x)) + for x in zip(bonds, indices): + buffer.write(bond_struct.pack(*x)) + return buffer.getvalue() class QueryIsomorphism(Isomorphism): - def get_mapping(self, other: Union['MoleculeContainer', 'QueryContainer'], /, *, automorphism_filter: bool = True, + __slots__ = () + + def get_mapping(self, other: 'MoleculeContainer', /, *, automorphism_filter: bool = True, searching_scope: Optional[Collection[int]] = None, _cython=True): """ - Get self to other Molecule or Query substructure mapping generator. + Get Query to Molecule substructure mapping generator. - :param other: Molecule or Query + :param other: Molecule :param automorphism_filter: Skip matches to the same atoms. :param searching_scope: substructure atoms list to localize isomorphism. """ # _cython - by default cython implementation enabled. # disable it by overriding method if Query Atoms or Containers logic changed. # Lv, Ts and Og in cython optimized mode treated as equal. - if isinstance(other, QueryIsomorphism): - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope) - elif isinstance(other, MoleculeIsomorphism): - if _cython: - try: # windows? ;) - from ._isomorphism import get_mapping as _cython_get_mapping - except ImportError: - components = get_mapping = None - else: - components = self._cython_compiled_query # override to cython data + if not isinstance(other, MoleculeIsomorphism): + raise TypeError('MoleculeContainer expected') - def get_mapping(query, scope): - return _cython_get_mapping(*query, *other._cython_compiled_structure, - array('I', [n in scope for n in other])) - else: + if _cython: + try: # windows? ;) + from ._isomorphism import get_mapping as _cython_get_mapping + except ImportError: components = get_mapping = None - return self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, - components=components, get_mapping=get_mapping) - raise TypeError('MoleculeContainer or QueryContainer expected') + else: + components = self._cython_compiled_query # override to cython data + + def get_mapping(query, scope): + return _cython_get_mapping(query, other._cython_compiled_structure, + array('I', [n in scope for n in other])) + else: + components = get_mapping = None + + for mapping in self._get_mapping(other, automorphism_filter=automorphism_filter, searching_scope=searching_scope, + components=components, get_mapping=get_mapping): + reverse = None + # test stereo labels matches + for n, a in self.atoms(): + if not isinstance(a, ExtendedQuery) or a.stereo is None: + continue # non-chiral atom matches any atom. no need for checks + m = mapping[n] + if other.atom(m).stereo is None: # stereo in query should match only stereo atom + break # reject mapping + + if m in other.stereogenic_tetrahedrons: + if other._translate_tetrahedron_sign(m, [mapping[x] for x in self._bonds[n]]) != a.stereo: + break # stereo sign doesn't match + else: # allene case + if reverse is None: + reverse = {m: n for n, m in mapping.items()} + ot1, ot2 = other._stereo_allenes_terminals[m] # get terminal atoms + on1, om1, on2, om2 = other.stereogenic_allenes[m] # get neighbors + t1, t2 = reverse[ot1], reverse[ot2] + env = (reverse.get(on1), reverse.get(om1), reverse.get(on2), reverse.get(om2)) + n1 = mapping[next(x for x in self._bonds[t1] if x in env)] + m1 = mapping[next(x for x in self._bonds[t2] if x in env)] + if other._translate_allene_sign(m, n1, m1) != a.stereo: + break + else: + for n, m, b in self.bonds(): + if b.stereo is None: + continue + on, om = mapping[n], mapping[m] + if other.bond(on, om).stereo is None: # chiral query bond matches only chiral molecule bond + break + if reverse is None: + reverse = {m: n for n, m in mapping.items()} + + ot1, ot2 = ots = other._stereo_cis_trans_terminals[on] # get terminal atoms + on1, om1, on2, om2 = other.stereogenic_cis_trans[ots] # get neighbors + t1, t2 = reverse[ot1], reverse[ot2] + env = (reverse.get(on1), reverse.get(om1), reverse.get(on2), reverse.get(om2)) + n1 = mapping[next(x for x in self._bonds[t1] if x in env)] + m1 = mapping[next(x for x in self._bonds[t2] if x in env)] + if other._translate_cis_trans_sign(ot1, ot2, n1, m1) != b.stereo: + break + else: + yield mapping @cached_property def _cython_compiled_query(self): @@ -337,7 +413,6 @@ def _cython_compiled_query(self): # padding: 1 bit # bond: single, double, triple, aromatic, special = 5 bit # bond in ring: 2 bit - from ..files._mdl.mol import common_isotopes _components, _closures = self._compiled_query components = [] @@ -361,7 +436,7 @@ def _cython_compiled_query(self): else: if isinstance(a, ListElement): v1 = v2 = 0 - for n in a._numbers: + for n in a.atomic_numbers: if n > 56: if n > 116: # Ts, Og n = 116 @@ -377,8 +452,8 @@ def _cython_compiled_query(self): else: v1 = 1 << (57 - n) v2 = 0 - if a.isotope: - v3 = 1 << (a.isotope - common_isotopes[a.atomic_symbol] + 54) + if isinstance(a, QueryElement) and a.isotope: + v3 = 1 << (a.isotope - a.mdl_isotope + 54) if a.is_radical: v3 |= 0x200000000000 else: @@ -486,10 +561,16 @@ def _cython_compiled_query(self): indices[j] = mapping[m] start += len(ms) q_to[i] = start - components.append((array('L', [n for n, *_ in c]), array('I', [0] + [mapping[x] for _, x, *_ in c[1:]]), - array('Q', masks1), array('Q', masks2), array('Q', masks3), array('Q', masks4), - array('I', closures), array('I', q_from), array('I', q_to), - array('I', indices), array('Q', bonds))) + + back = [0] + [mapping[x] for _, x, *_ in c[1:]] + numbers = [n for n, *_ in c] + buffer = BytesIO() + buffer.write(header_struct.pack(len(numbers))) + for x in zip(masks1, masks2, masks3, masks4, back, closures, q_from, q_to, numbers): + buffer.write(q_atom_struct.pack(*x)) + for x in zip(bonds, indices): + buffer.write(bond_struct.pack(*x)) + components.append(buffer.getvalue()) return components @@ -498,8 +579,7 @@ def _get_automorphism_mapping(atoms: Dict[int, int], bonds: Dict[int, Dict[int, return # all atoms unique components, closures = _compile_query(atoms, bonds) - mappers = [_get_mapping(order, closures, atoms, bonds, {x for x, *_ in order}) - for order in components] + mappers = [_get_mapping(order, closures, atoms, bonds, {x for x, *_ in order}) for order in components] if len(mappers) == 1: for mapping in mappers[0]: if any(k != v for k, v in mapping.items()): diff --git a/chython/algorithms/mapping/attention.py b/chython/algorithms/mapping/attention.py index e8c75ff0..bc4e870a 100644 --- a/chython/algorithms/mapping/attention.py +++ b/chython/algorithms/mapping/attention.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # @@ -33,6 +33,7 @@ class Attention: __slots__ = () + __class_cache__ = {} def reset_mapping(self: Union['ReactionContainer', 'Attention'], *, return_score: bool = False, multiplier=1.75, keep_reactants_numbering=False) -> Union[bool, float]: diff --git a/chython/algorithms/mapping/fixmapper.py b/chython/algorithms/mapping/fixmapper.py index 84768bdc..251eea95 100644 --- a/chython/algorithms/mapping/fixmapper.py +++ b/chython/algorithms/mapping/fixmapper.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -50,7 +50,7 @@ def fix_mapping(self: 'ReactionContainer', *, logging: bool = False) -> \ free_number = count(max(cgr) + 1) components = [(cgr.substructure(c), cgr.augmented_substructure(c, 2), # deep DEPENDS on rules! - set(c)) + c) for c in cgr.substructure(cgr.center_atoms).connected_components] r_atoms = ChainMap(*(x._atoms for x in self.reactants)) diff --git a/chython/algorithms/mcs.py b/chython/algorithms/mcs.py index 437d2dcf..8e1bf41b 100644 --- a/chython/algorithms/mcs.py +++ b/chython/algorithms/mcs.py @@ -97,10 +97,10 @@ def __get_product(self: 'molecule.MoleculeContainer', other: 'molecule.MoleculeC o_bonds = other._bonds s_equal = defaultdict(list) # equal self atoms - for n, atom in self._atoms.items(): + for n, atom in self.atoms(): s_equal[atom].append(n) p_equal = defaultdict(list) # equal other atoms - for n, atom in other._atoms.items(): + for n, atom in other.atoms(): p_equal[atom].append(n) full_product = {} diff --git a/chython/algorithms/morgan.py b/chython/algorithms/morgan.py index 659c50c8..8c8c1b30 100644 --- a/chython/algorithms/morgan.py +++ b/chython/algorithms/morgan.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -27,29 +27,27 @@ if TYPE_CHECKING: - from chython.containers.graph import Graph + from chython.containers import MoleculeContainer class Morgan: __slots__ = () @cached_property - def atoms_order(self: 'Graph') -> Dict[int, int]: + def atoms_order(self: 'MoleculeContainer') -> Dict[int, int]: """ Morgan like algorithm for graph nodes ordering :return: dict of atom-order pairs """ - atoms = self._atoms - if not atoms: # for empty containers + if not self: # for empty containers return {} - elif len(atoms) == 1: # optimize single atom containers - return dict.fromkeys(atoms, 1) - ring = self.ring_atoms - return _morgan({n: hash((hash(a), n in ring)) for n, a in atoms.items()}, self.int_adjacency) + elif len(self) == 1: # optimize single atom containers + return dict.fromkeys(self, 1) + return _morgan({n: hash(a) for n, a in self.atoms()}, self.int_adjacency) @cached_property - def int_adjacency(self: 'Graph') -> Dict[int, Dict[int, int]]: + def int_adjacency(self: 'MoleculeContainer') -> Dict[int, Dict[int, int]]: """ Adjacency with integer-coded bonds. """ diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 0b50b2a4..f7dc58de 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from CachedMethods import cached_args_method from collections import defaultdict, deque from functools import cached_property from itertools import combinations @@ -26,14 +25,14 @@ if TYPE_CHECKING: - from chython.containers.graph import Graph + from chython.containers import MoleculeContainer class Rings: __slots__ = () @cached_property - def sssr(self) -> Tuple[Tuple[int, ...], ...]: + def sssr(self) -> List[Tuple[int, ...]]: """ Smallest Set of Smallest Rings. Special bonds ignored. @@ -47,72 +46,25 @@ def sssr(self) -> Tuple[Tuple[int, ...], ...]: """ if self.rings_count: return _sssr(self.not_special_connectivity, self.rings_count) - return () + return [] @cached_property - def atoms_rings(self) -> Dict[int, Tuple[Tuple[int, ...]]]: + def atoms_rings(self) -> Dict[int, List[Tuple[int, ...]]]: """ - Dict of atoms rings which contains it. + A dictionary with atom numbers as keys and a list of tuples (representing SSSR rings) as values. """ rings = defaultdict(list) for r in self.sssr: for n in r: rings[n].append(r) - return {n: tuple(rs) for n, rs in rings.items()} + return dict(rings) @cached_property - def atoms_rings_sizes(self) -> Dict[int, Tuple[int, ...]]: + def atoms_rings_sizes(self) -> Dict[int, Set[int]]: """ - Sizes of rings containing atom. + Sizes of SSSR rings containing atom. """ - return {n: tuple(len(r) for r in rs) for n, rs in self.atoms_rings.items()} - - @cached_args_method - def is_ring_bond(self: 'Graph', n: int, m: int, /) -> bool: - """ - Check is bond in any ring. - """ - self.bond(n, m) # check if bond exists - try: - return not set(self.atoms_rings[n]).isdisjoint(self.atoms_rings[m]) - except KeyError: - return False - - @cached_property - def ring_atoms(self): - """ - Atoms in rings. Not SSSR based fast algorithm. - """ - bonds = _skin_graph(self.not_special_connectivity) - if not bonds: - return set() - - in_rings = set() - atoms = set(bonds) - while atoms: - stack = deque([(atoms.pop(), 0, 0)]) - path = [] - seen = set() - while stack: - c, p, d = stack.pop() - if len(path) > d: - path = path[:d] - if c in in_rings: - continue - path.append(c) - seen.add(c) - - d += 1 - for n in bonds[c]: - if n == p: - continue - elif n in seen: - in_rings.update(path[path.index(n):]) - else: - stack.append((n, c, d)) - - atoms.difference_update(seen) - return in_rings + return {n: {len(r) for r in rs} for n, rs in self.atoms_rings.items()} @cached_property def rings_count(self) -> int: @@ -123,7 +75,7 @@ def rings_count(self) -> int: return sum(len(x) for x in bonds.values()) // 2 - len(bonds) + len(_connected_components(bonds)) @cached_property - def not_special_connectivity(self: 'Graph') -> Dict[int, Set[int]]: + def not_special_connectivity(self: 'MoleculeContainer') -> Dict[int, Set[int]]: """ Graph connectivity without special bonds. """ @@ -136,13 +88,11 @@ def not_special_connectivity(self: 'Graph') -> Dict[int, Set[int]]: return bonds @cached_property - def connected_components(self: 'Graph') -> Tuple[Tuple[int, ...], ...]: + def connected_components(self: 'MoleculeContainer') -> List[Set[int]]: """ Isolated components of single graph. E.g. salts as ion pair. """ - if not self._atoms: - return () - return tuple(tuple(x) for x in self._connected_components) + return _connected_components(self._bonds) @property def connected_components_count(self) -> int: @@ -152,18 +102,53 @@ def connected_components_count(self) -> int: return len(self.connected_components) @cached_property - def skin_graph(self: 'Graph') -> Dict[int, Set[int]]: + def skin_graph(self: 'MoleculeContainer') -> Dict[int, Set[int]]: """ Graph without terminal atoms. Only rings and linkers """ return _skin_graph(self._bonds) @cached_property - def _connected_components(self: 'Graph') -> List[Set[int]]: - return _connected_components(self._bonds) + def rings_graph(self: 'MoleculeContainer'): + """ + Graph of rings. Linkers are not included. Special bonds are considered. + """ + bonds = {n: ms.copy() for n, ms in self.skin_graph.items()} + if not bonds: + return {} + + in_rings = set() + atoms = set(bonds) + while atoms: + stack = deque([(atoms.pop(), 0, 0)]) + path = [] + seen = set() + while stack: + c, p, d = stack.pop() + if len(path) > d: + path = path[:d] + if c in in_rings: + continue + path.append(c) + seen.add(c) + + d += 1 + for n in bonds[c]: + if n == p: + continue + elif n in seen: + in_rings.update(path[path.index(n):]) + else: + stack.append((n, c, d)) + + atoms.difference_update(seen) + for n in bonds.keys() - in_rings: + for m in bonds.pop(n): + bonds[m].discard(n) + return bonds -def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> Tuple[Tuple[int, ...], ...]: +def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> List[Tuple[int, ...]]: """ Smallest Set of Smallest Rings of any adjacency matrix. Number of rings required. @@ -529,7 +514,7 @@ def _connected_rings(rings, seen_rings): def _rings_filter(rings, n_sssr): c = next(rings) if n_sssr == 1: - return c, + return [c] seen_rings = {c} sssr_atoms = set(c) @@ -545,7 +530,7 @@ def _rings_filter(rings, n_sssr): sssr_atoms.update(c) sssr.append(c) if len(sssr) == n_sssr: - return tuple(sssr) + return sssr # now we have set of plug rings (cuban fullerene), besiege rings and condensed trash seen_rings = {c: _ring_adjacency(c) for c in seen_rings} # prepare adjacency @@ -558,7 +543,7 @@ def _rings_filter(rings, n_sssr): condensed_rings = _connected_rings(condensed_rings, seen_rings) sssr.append(c) if len(sssr) == n_sssr: - return tuple(sorted(sssr, key=len)) + return sorted(sssr, key=len) raise ImplementationError('SSSR count not reached') diff --git a/chython/algorithms/smiles.py b/chython/algorithms/smiles.py index e4b8dfdd..22bc5e6b 100644 --- a/chython/algorithms/smiles.py +++ b/chython/algorithms/smiles.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2024 Ramil Nugmanov +# Copyright 2017-2025 Ramil Nugmanov # Copyright 2019 Timur Gimadiev # This file is part of chython. # @@ -21,7 +21,6 @@ from CachedMethods import cached_method from collections import defaultdict from functools import cached_property -from hashlib import sha512 from heapq import heappop, heappush from itertools import product from random import random @@ -29,13 +28,12 @@ if TYPE_CHECKING: - from chython import MoleculeContainer, CGRContainer, QueryContainer + from chython import MoleculeContainer, CGRContainer from chython.containers.graph import Graph charge_str = {-4: '-4', -3: '-3', -2: '-2', -1: '-', 0: '0', 1: '+', 2: '+2', 3: '+3', 4: '+4'} order_str = {1: '-', 2: '=', 3: '#', 4: ':', 8: '~', None: '.'} organic_set = {'C', 'N', 'O', 'P', 'S', 'F', 'Cl', 'Br', 'I', 'B'} -hybridization_str = {4: '4', 3: '1', 2: '2', 1: '3', None: 'n'} dyn_order_str = {(None, 1): '[.>-]', (None, 2): '[.>=]', (None, 3): '[.>#]', (None, 4): '[.>:]', (None, 8): '[.>~]', (1, None): '[->.]', (1, 1): '', (1, 2): '[->=]', (1, 3): '[->#]', (1, 4): '[->:]', (1, 8): '[->~]', (2, None): '[=>.]', (2, 1): '[=>-]', (2, 2): '=', (2, 3): '[=>#]', (2, 4): '[=>:]', (2, 8): '[=>~]', @@ -49,6 +47,13 @@ dyn_radical_str = {(True, True): '*', (True, False): '*>^', (False, True): '^>*'} +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 +S = 16 + class Smiles(ABC): __slots__ = () @@ -140,10 +145,6 @@ def __eq__(self, other): def __hash__(self): return hash(str(self)) - @cached_method - def __bytes__(self): - return sha512(str(self).encode()).digest() - @cached_property def smiles_atoms_order(self) -> Tuple[int, ...]: """ @@ -321,8 +322,9 @@ def _format_atom(self, n, adjacency, **kwargs): def _format_bond(self, n, m, adjacency, **kwargs): ... - def _smiles_order(self: 'Graph', stereo=True) -> Callable: - return self.atoms_order.__getitem__ + @abstractmethod + def _smiles_order(self, stereo=True) -> Callable: + ... def _format_cxsmiles(self, order) -> Optional[str]: ... @@ -374,7 +376,7 @@ def sticky_smiles(self: Union['MoleculeContainer', 'MoleculeSmiles'], left: int, smiles = smiles[2:] return ''.join(smiles) - def _smiles_order(self: 'MoleculeContainer', stereo=True) -> Callable: + def _smiles_order(self: 'MoleculeContainer', stereo=True): if stereo: return self._chiral_morgan.__getitem__ else: @@ -382,15 +384,11 @@ def _smiles_order(self: 'MoleculeContainer', stereo=True) -> Callable: def _format_cxsmiles(self: 'MoleculeContainer', order): if self.is_radical: - radical = self._radicals - return f'|^1:{",".join(str(n) for n, m in enumerate(order) if radical[m])}|' + return f'|^1:{",".join(str(n) for n, m in enumerate(order) if self._atoms[m].is_radical)}|' return def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): atom = self._atoms[n] - charge = self._charges[n] - ih = self._hydrogens[n] - hyb = self.hybridization(n) smi = ['', # [ str(atom.isotope) if atom.isotope else '', # isotope @@ -401,66 +399,66 @@ def _format_atom(self: 'MoleculeContainer', n, adjacency, **kwargs): f':{n}' if kwargs.get('mapping', False) else '', # mapping ''] # ] - if kwargs.get('stereo', True): - if n in self._atoms_stereo: - if ih and next(x for x in adjacency) == n: # first atom in smiles has reversed chiral mark - smi[3] = '@@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@' - else: - smi[3] = '@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@@' - elif n in self._allenes_stereo: + if atom.stereo is not None and kwargs.get('stereo', True): + # allene + if n in self._stereo_allenes_terminals: t1, t2 = self._stereo_allenes_terminals[n] - env = self._stereo_allenes[n] + env = self.stereogenic_allenes[n] n1 = next(x for x in adjacency[t1] if x in env) n2 = next(x for x in adjacency[t2] if x in env) smi[3] = '@' if self._translate_allene_sign(n, n1, n2) else '@@' - elif charge and kwargs.get('charges', True): - smi[5] = charge_str[charge] - elif charge and kwargs.get('charges', True): - smi[5] = charge_str[charge] + # tetrahedron + elif atom.implicit_hydrogens and next(x for x in adjacency) == n: + # first atom in smiles has reversed chiral mark + smi[3] = '@@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@' + else: + smi[3] = '@' if self._translate_tetrahedron_sign(n, adjacency[n]) else '@@' - if any(smi) or atom.atomic_symbol not in organic_set or self._radicals[n] or kwargs.get('hydrogens', False): + if atom.charge and kwargs.get('charges', True): + smi[5] = charge_str[atom.charge] + + if any(smi) or atom.atomic_symbol not in organic_set or atom.is_radical or kwargs.get('hydrogens', False): smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' - elif ih: - smi[4] = f'H{ih}' - elif hyb == 4 and ih and atom.atomic_number in (5, 7, 15): # pyrrole + elif atom.implicit_hydrogens: + smi[4] = f'H{atom.implicit_hydrogens}' + elif atom.hybridization == 4 and atom.implicit_hydrogens and atom in (B, N, P): # pyrrole smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' else: - smi[4] = f'H{ih}' - elif not ih and atom.atomic_number in (5, 6, 15, 16) and not self.not_special_connectivity[n]: + smi[4] = f'H{atom.implicit_hydrogens}' + elif not atom.implicit_hydrogens and atom in (B, C, P, S) and not self.not_special_connectivity[n]: # elemental B, C, P, S smi[0] = '[' smi[-1] = ']' - elif ih and atom.atomic_number == 15 and hyb != 1: + elif atom.implicit_hydrogens and atom == P and atom.hybridization != 1: smi[0] = '[' smi[-1] = ']' - if ih == 1: + if atom.implicit_hydrogens == 1: smi[4] = 'H' else: - smi[4] = f'H{ih}' + smi[4] = f'H{atom.implicit_hydrogens}' - if kwargs.get('aromatic', True) and hyb == 4: + if kwargs.get('aromatic', True) and atom.hybridization == 4: smi[2] = atom.atomic_symbol.lower() else: smi[2] = atom.atomic_symbol return ''.join(smi) - def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): + def _format_bond(self: Union['MoleculeContainer', 'MoleculeSmiles'], n, m, adjacency, **kwargs): if not kwargs.get('bonds', True): return '' - bonds = self._bonds - order = bonds[n][m].order - if order == 4: + bond = self._bonds[n][m] + if bond == 4: if kwargs.get('aromatic', True): return '' return ':' - elif order == 1: # cis-trans /\ - if kwargs.get('aromatic', True) and self.hybridization(n) == self.hybridization(m) == 4: + elif bond == 1: # cis-trans /\ + if kwargs.get('aromatic', True) and self._atoms[n].hybridization == self._atoms[m].hybridization == 4: return '-' if kwargs.get('stereo', True): if 'cache' in adjacency: @@ -471,27 +469,28 @@ def _format_bond(self: 'MoleculeContainer', n, m, adjacency, **kwargs): if (x := ct_map.get((n, m))) is not None: return '/' if x else '\\' return '' - elif order == 2: + elif bond == 2: return '=' - elif order == 3: + elif bond == 3: return '#' - else: # order == 8 + else: # bond == 8 return '~' - def __ct_map(self, adjacency): + def __ct_map(self: 'MoleculeContainer', adjacency): + stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} + if not stereo_bonds: + return {} ct_map = {} - cts = self._cis_trans_stereo - if not cts: - return ct_map + sct = self.stereogenic_cis_trans + ctc = self._stereo_cis_trans_centers ctt = self._stereo_cis_trans_terminals - sct = self._stereo_cis_trans - ctc = self._stereo_cis_trans_counterpart + ctcp = self._stereo_cis_trans_counterpart seen = set() for k, vs in adjacency.items(): seen.add(k) - if (ts := ctt.get(k)) and ts in cts: - env = sct[ts] + if (cs := ctc.get(k)) and stereo_bonds.issuperset(cs): + env = sct[ctt[k]] for v in vs: if v in env: if (k, v) in ct_map: @@ -500,11 +499,11 @@ def __ct_map(self, adjacency): s = ct_map[(k, x)] ct_map[(k, v)] = not s # X/C(/R)=, C(\X)(/R)=, C(=C(\X)/R)=C= ct_map[(v, k)] = s - if y := ctt.get(v): # =C(\X)/R=, C(\X)(/R=)= + if y := ctc.get(v): # =C(\X)/R=, C(\X)(/R=)= ct_map[v] = k seen.add(y) - elif ts in seen: - o = ctc[k] + elif cs in seen: + o = ctcp[k] on = ct_map[o] s = ct_map[(o, on)] if not self._translate_cis_trans_sign(k, o, v, on): @@ -512,38 +511,37 @@ def __ct_map(self, adjacency): ct_map[(k, v)] = s ct_map[k] = v ct_map[(v, k)] = not s # C/R=, R\1...C/1 - if y := ctt.get(v): + if y := ctc.get(v): ct_map[v] = k seen.add(y) else: # left entry to double bond - if y := ctt.get(v): # 1,3-diene case + if y := ctc.get(v): # 1,3-diene case ct_map[v] = k seen.add(y) ct_map[(v, k)] = True # R/C=, C\1=...R/1, C(/R=)=, C(=C(/R=))=C= ct_map[(k, v)] = False # first DOWN ct_map[k] = v - seen.add(ts) + seen.add(cs) return ct_map class CGRSmiles(Smiles): __slots__ = () + def _smiles_order(self: 'CGRContainer', stereo=True): + return self.atoms_order.__getitem__ + def _format_atom(self: 'CGRContainer', n, adjacency, **kwargs): atom = self._atoms[n] - charge = self._charges[n] - is_radical = self._radicals[n] - p_charge = self._p_charges[n] - p_is_radical = self._p_radicals[n] if atom.isotope: smi = [str(atom.isotope), atom.atomic_symbol] else: smi = [atom.atomic_symbol] - if charge or p_charge: - smi.append(dyn_charge_str[(charge, p_charge)]) - if is_radical or p_is_radical: - smi.append(dyn_radical_str[(is_radical, p_is_radical)]) + if atom.charge or atom.p_charge: + smi.append(dyn_charge_str[(atom.charge, atom.p_charge)]) + if atom.is_radical or atom.p_is_radical: + smi.append(dyn_radical_str[(atom.is_radical, atom.p_is_radical)]) if len(smi) != 1 or atom.atomic_symbol not in organic_set: smi.insert(0, '[') @@ -555,82 +553,4 @@ def _format_bond(self: 'CGRContainer', n, m, adjacency, **kwargs): return dyn_order_str[(bond.order, bond.p_order)] -class QuerySmiles(Smiles): - __slots__ = () - - def _format_cxsmiles(self: 'QueryContainer', order): - hybridization = self._hybridizations - heteroatoms = self._heteroatoms - masked = self._masked - radical = self._radicals - - hh = ['atomProp'] - cx = [] - if any(radical.values()): - cx.append(f'^1:{",".join(str(n) for n, m in enumerate(order) if radical[m])}') - - for n, m in enumerate(order): - if len(hb := hybridization[m]) > 1 or (hb and hb[0] != 4): - hh.append(f'{n}.hyb.{"".join(hybridization_str[x] for x in hb)}') - if ha := heteroatoms[m]: - hh.append(f'{n}.het.{"".join(str(x) for x in ha)}') - if masked[m]: - hh.append(f'{n}.msk.1') - if len(hh) > 1: - cx.append(':'.join(hh)) - if cx: - return f'|{",".join(cx)}|' - - def _format_atom(self: 'QueryContainer', n, adjacency, **kwargs): - atom = self._atoms[n] - charge = self._charges[n] - hybridization = self._hybridizations[n] - neighbors = self._neighbors[n] - hydrogens = self._hydrogens[n] - rings = self._rings_sizes[n] - - if atom.isotope: - smi = ['[', str(atom.isotope), atom.atomic_symbol] - else: - smi = ['[', atom.atomic_symbol] - - if n in self._atoms_stereo: # mark atom as chiral. it's too difficult to set correct sign - smi.append(';@?') - if n in self._allenes_stereo: - smi.append(';@?') - - if charge: - smi.append(';') - smi.append(charge_str[charge]) - - if hydrogens: # h implicit-H-count implicit hydrogens - smi.append(';') - smi.append(','.join(f'h{x}' for x in hydrogens)) - - if neighbors: # D degree explicit connections - smi.append(';') - smi.append(','.join(f'D{x}' for x in neighbors)) - - if rings: - smi.append(';') - if rings[0]: - smi.append(','.join(f'r{x}' for x in rings)) - else: - smi.append('!R') - - if len(hybridization) == 1 and hybridization[0] == 4: # only aromatic. other marks in cx extension - smi.append(';a') - - smi.append(']') - return ''.join(smi) - - def _format_bond(self: 'QueryContainer', n, m, adjacency, **kwargs): - # bond chirality skipped. too difficult to implement. - b = self._bonds[n][m] - s = ','.join(order_str[x] for x in b.order) - if (c := b.in_ring) is not None: - s += ';@' if c else ';!@' - return s - - -__all__ = ['MoleculeSmiles', 'CGRSmiles', 'QuerySmiles'] +__all__ = ['MoleculeSmiles', 'CGRSmiles'] diff --git a/chython/algorithms/standardize/molecule.py b/chython/algorithms/standardize/molecule.py index 89bf57f5..65ce7e85 100644 --- a/chython/algorithms/standardize/molecule.py +++ b/chython/algorithms/standardize/molecule.py @@ -25,13 +25,18 @@ from ._metal_organics import rules as metal_rules from ...containers.bonds import Bond from ...exceptions import ValenceError, ImplementationError -from ...periodictable import H +from ...periodictable import H as _H if TYPE_CHECKING: from chython import MoleculeContainer +# atomic number constants +H = 5 +C = 6 + + class Standardize: __slots__ = () @@ -50,7 +55,7 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F h, changed = self.implicify_hydrogens(_fix_stereo=False, logging=True) if fix_tautomers and (logging or keep_kekule): # thiele can change tautomeric form - hgs = self._hydrogens.copy() + hgs = {n: a.implicit_hydrogens for n, a in self.atoms()} if keep_kekule: # save bond orders bonds = [(b, b.order) for _, _, b in self.bonds()] @@ -61,12 +66,13 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F if keep_kekule and t: # restore # check ring charge/hydrogen moving - if c or fix_tautomers and hgs != self._hydrogens: # noqa + if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self.atoms()): self.kekule() # we need to do full kekule again else: for b, o in bonds: # noqa - b._Bond__order = o # noqa - self.flush_cache() + b._order = o + self.flush_cache() + self.calc_labels() if logging: if k: @@ -75,13 +81,12 @@ def canonicalize(self: 'MoleculeContainer', *, fix_tautomers=True, keep_kekule=F s.append((tuple(changed), -1, 'implicified')) if t: s.append(((), -1, 'aromatized')) - if fix_tautomers and hgs != self._hydrogens: - s.append((tuple(x for x, y in self._hydrogens.items() if hgs[x] != y), - -1, 'aromatic tautomer found')) + if fix_tautomers and (x := tuple(n for n, a in self.atoms() if hgs[n] != a.implicit_hydrogens)): + s.append((x, -1, 'aromatic tautomer found')) if c: s.append((tuple(c), -1, 'recharged')) if keep_kekule and t: - if c or fix_tautomers and hgs != self._hydrogens: + if c or fix_tautomers and any(hgs[n] != a.implicit_hydrogens for n, a in self.atoms()): s.append(((), -1, 'kekulized again')) else: s.append(((), -1, 'kekule form restored')) @@ -118,16 +123,14 @@ def standardize(self: Union['MoleculeContainer', 'Standardize'], *, logging=Fals log.extend(l) fixed.update(f) - if b := fixed.intersection(n for n, h in self._hydrogens.items() if h is None): + if b := fixed.intersection(n for n, a in self.atoms() if a.implicit_hydrogens is None): if ignore: log.append((tuple(b), -1, 'standardization failed')) else: raise ImplementationError(f'standardization leads to invalid valences: {b}') - if fixed: - self.flush_cache() - if _fix_stereo: - self.fix_stereo() + if fixed and _fix_stereo: + self.fix_stereo() if logging: if fixed: @@ -146,10 +149,7 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol changed: List[int] = [] bonds = self._bonds nsc = self.not_special_connectivity - hydrogens = self._hydrogens - charges = self._charges atoms = self._atoms - hybridization = self.hybridization if prepare_molecule: self.thiele() @@ -165,25 +165,25 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol # if not 2 neighbors and 1 hydrogen or 3 neighbors within 1st and second atoms - break atom_1, atom_2 = mapping[1], mapping[2] if len(bonds[atom_1]) == 2: - if not hydrogens[atom_1]: + if not atoms[atom_1].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_1].values()): continue if len(bonds[atom_2]) == 2: - if not hydrogens[atom_2]: + if not atoms[atom_2].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_2].values()): continue if fix: atom_3 = mapping[3] - charges[atom_3] = 0 + atoms[atom_3]._charge = 0 changed.append(atom_3) else: - charges[atom_1] = 0 + atoms[atom_1]._charge = 0 changed.append(atom_1) - charges[atom_2] = 1 + atoms[atom_2]._charge = 1 changed.append(atom_2) # add atoms to changed # morgan @@ -196,36 +196,36 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol seen.update(match) atom_1, atom_2 = mapping[1], mapping[2] if len(bonds[atom_1]) == 2: - if not hydrogens[atom_1]: + if not atoms[atom_1].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_1].values()): continue if len(bonds[atom_2]) == 2: - if not hydrogens[atom_2]: + if not atoms[atom_2].implicit_hydrogens: continue elif all(x == 4 for x in bonds[atom_2].values()): continue if fix: atom_3 = mapping[3] - charges[atom_3] = 0 + atoms[atom_3]._charge = 0 changed.append(atom_3) else: # remove charge from 1st N atom - charges[atom_1] = 0 + atoms[atom_1]._charge = 0 pairs.append((atom_1, atom_2, fix)) if pairs: self.__dict__.pop('atoms_order', None) # remove cached morgan for atom_1, atom_2, fix in pairs: if self.atoms_order[atom_1] > self.atoms_order[atom_2]: - charges[atom_2] = 1 + atoms[atom_2]._charge = 1 changed.append(atom_2) if not fix: changed.append(atom_1) else: - charges[atom_1] = 1 + atoms[atom_1]._charge = 1 if fix: changed.append(atom_1) del self.__dict__['atoms_order'] # remove invalid morgan @@ -233,29 +233,29 @@ def standardize_charges(self: 'MoleculeContainer', *, logging=False, prepare_mol # ferrocene fcr = [] for r in self.sssr: - if len(r) != 5 or not all(hybridization(n) == 4 for n in r): + if len(r) != 5 or not all(atoms[n].hybridization == 4 for n in r): continue - ch = [(n, x) for n in r if (x := charges[n])] + ch = [(n, x) for n in r if (x := atoms[n].charge)] if len(ch) != 1 or ch[0][1] != -1: continue ch = ch[0][0] - ca = [n for n in r if atoms[n].atomic_number == 6 and - (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b.order == 1 for b in bonds[n].values()))] + ca = [n for n in r if atoms[n] == C and + (len(bs := nsc[n]) == 2 or len(bs) == 3 and any(b == 1 for b in bonds[n].values()))] if len(ca) < 2 or ch not in ca: continue - charges[ch] = 0 # reset charge for morgan recalculation + atoms[ch]._charge = 0 # reset charge for morgan recalculation fcr.append(ca) changed.append(ch) if fcr: self.__dict__.pop('atoms_order', None) # remove cached morgan for ca in fcr: n = min(ca, key=self.atoms_order.get) - charges[n] = -1 + atoms[n]._charge = -1 changed.append(n) del self.__dict__['atoms_order'] # remove invalid morgan if changed: - self.flush_cache() # clear cache + self.flush_cache(keep_sssr=True, keep_components=True) # clear cache if _fix_stereo: self.fix_stereo() if logging: @@ -271,20 +271,19 @@ def remove_coordinate_bonds(self: 'MoleculeContainer', *, keep_to_terminal=True, :param keep_to_terminal: Keep any bonds to terminal hydrogens :return: removed bonds count """ - bonds = self._bonds - - ab = [(n, m) for n, m, b in self.bonds() if b.order == 8] + ab = [(n, m) for n, m, b in self.bonds() if b == 8] if keep_to_terminal: skeleton = self.not_special_connectivity - hs = {n for n, a in self._atoms.items() if a.atomic_number == 1 and not skeleton[n]} + hs = {n for n, a in self.atoms() if a == H and not skeleton[n]} ab = [(n, m) for n, m in ab if n not in hs and m not in hs] for n, m in ab: - del bonds[n][m], bonds[m][n] + self.delete_bond(n, m, _skip_calculation=True) if ab: - self.flush_cache() + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() return len(ab) @@ -299,43 +298,36 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo :param logging: return list of changed atoms. """ atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - plane = self._plane - hydrogens = self._hydrogens - parsed_mapping = self._parsed_mapping explicit = defaultdict(list) for n, atom in atoms.items(): - if atom.atomic_number == 1 and (atom.isotope is None or atom.isotope == 1): + if atom == H and (atom.isotope is None or atom.isotope == 1): if len(bonds[n]) > 1: raise ValenceError(f'Hydrogen atom {n} has invalid valence. Try to use remove_coordinate_bonds()') for m, b in bonds[n].items(): - if b.order == 1: - if atoms[m].atomic_number != 1: # not H-H + if b == 1: + if atoms[m] != H: # not H-H explicit[m].append(n) - elif b.order != 8: + elif b != 8: raise ValenceError(f'Hydrogen atom {n} has invalid valence {b.order}.') to_remove = set() fixed = {} for n, hs in explicit.items(): atom = atoms[n] - charge = charges[n] - is_radical = radicals[n] len_h = len(hs) for i in range(len_h, 0, -1): hi = hs[:i] explicit_sum = 0 explicit_dict = defaultdict(int) for m, bond in bonds[n].items(): - if m not in hi and bond.order != 8: + if m not in hi and bond != 8: explicit_sum += bond.order explicit_dict[(bond.order, atoms[m].atomic_number)] += 1 try: # aromatic rings don't match any rule - rules = atom.valence_rules(charge, is_radical, explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: break for s, d, h in rules: @@ -349,23 +341,15 @@ def implicify_hydrogens(self: 'MoleculeContainer', *, logging=False, _fix_stereo for n in to_remove: del atoms[n] - del charges[n] - del radicals[n] - del plane[n] - del hydrogens[n] for m in bonds.pop(n): del bonds[m][n] - try: - del parsed_mapping[n] - except KeyError: - pass for n, h in fixed.items(): - hydrogens[n] = h + atoms[n]._implicit_hydrogens = h if to_remove: - self.flush_cache() - self._conformers = [{x: y for x, y in c.items() if x not in to_remove} for c in self._conformers] # noqa + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() @@ -380,26 +364,28 @@ def explicify_hydrogens(self: 'MoleculeContainer', *, start_map=None, _return_ma :return: number of added atoms """ - hydrogens = self._hydrogens + atoms = self._atoms to_add = [] - for n, h in hydrogens.items(): + for n, a in atoms.items(): try: - to_add.extend([n] * h) + to_add.extend([n] * a.implicit_hydrogens) except TypeError: raise ValenceError(f'atom {n} has valence error') if to_add: log = [] bonds = self._bonds - m = start_map + m = start_map if start_map is not None else max(atoms) + 1 for n in to_add: - m = self.add_atom(H(), m) - bonds[n][m] = bonds[m][n] = b = Bond(1) - b._attach_graph(self, n, m) - hydrogens[n] = 0 + atoms[m] = _H(implicit_hydrogens=0) + bonds[n][m] = b = Bond(1) + bonds[m] = {n: b} + atoms[n]._implicit_hydrogens = 0 log.append((n, m)) m += 1 + self.flush_cache(keep_sssr=True) + self.calc_labels() if _fix_stereo: self.fix_stereo() if _return_map: @@ -415,35 +401,33 @@ def check_valence(self: 'MoleculeContainer') -> List[int]: :return: list of invalid atoms """ - return [n for n, h in self._hydrogens.items() if h is None] # only invalid atoms have None hydrogens. + # only invalid atoms have None hydrogens. + return [n for n, a in self.atoms() if a.implicit_hydrogens is None] def clean_isotopes(self: 'MoleculeContainer') -> bool: """ Clean isotope marks from molecule. Return True if any isotope found. """ - atoms = self._atoms - isotopes = [x for x in atoms.values() if x.isotope] + isotopes = [a for _, a in self.atoms() if a.isotope] if isotopes: for i in isotopes: - i._Core__isotope = None - self.flush_cache() + i._isotope = None + self.flush_cache(keep_sssr=True, keep_components=True) self.fix_stereo() return True return False def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): + atoms = self._atoms bonds = self._bonds - charges = self._charges - radicals = self._radicals - calc_implicit = self._calc_implicit log = [] fixed = set() - flush = False for r, (pattern, atom_fix, bonds_fix, any_atoms, is_tautomer) in enumerate(rules): if not fix_tautomers and is_tautomer: continue + keep_sssr = keep_components = True hs = set() seen = set() for mapping in pattern.get_mapping(self, automorphism_filter=False): @@ -457,53 +441,37 @@ def __standardize(self: 'MoleculeContainer', rules, fix_tautomers): for n, (ch, ir) in atom_fix.items(): n = mapping[n] hs.add(n) - charges[n] += ch - if charges[n] > 4: - charges[n] -= ch + a = atoms[n] + a._charge += ch + if a.charge > 4: + a._charge -= ch log.append((tuple(match), r, f'bad charge formed. changes omitted: {pattern}')) break # skip changes if ir is not None: - radicals[n] = ir + a._is_radical = ir else: - for n, m, b in bonds_fix: + for n, m, bo in bonds_fix: n = mapping[n] m = mapping[m] hs.add(n) hs.add(m) if m in bonds[n]: - bonds[n][m]._Bond__order = b # noqa - if b == 8: - # expected original molecule don't contain `any` bonds or these bonds not changed - flush = True - else: - if b != 8: - flush = True - bonds[n][m] = bonds[m][n] = b = Bond(b) - b._attach_graph(self, n, m) + b = bonds[n][m] + if b == 8 or bo == 8: + keep_sssr = False + b._order = bo + else: # new bond + keep_sssr = keep_components = False + bonds[n][m] = bonds[m][n] = Bond(bo) log.append((tuple(match), r, str(pattern))) if not hs: # not matched continue - # flush cache only for changed atoms. - if flush: # neighbors count changed - ngb = self.__dict__['__cached_args_method_neighbors'] - for n in hs: - try: - del ngb[(n,)] - except KeyError: - pass - del self.__dict__['bonds_count'] - flush = False - # need hybridization recalculation - hyb = self.__dict__['__cached_args_method_hybridization'] - for n in hs: - try: - del hyb[(n,)] - except KeyError: # already flushed before - pass + self.flush_cache(keep_sssr=keep_sssr, keep_components=keep_components) + # recalculate isomorphism labels + self.calc_labels() for n in hs: # hydrogens count recalculation - calc_implicit(n) - del self.__dict__['_cython_compiled_structure'] + self.calc_implicit(n) fixed.update(hs) return log, fixed diff --git a/chython/algorithms/standardize/reaction.py b/chython/algorithms/standardize/reaction.py index 17128417..e6e5ddff 100644 --- a/chython/algorithms/standardize/reaction.py +++ b/chython/algorithms/standardize/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2022 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # Copyright 2021 Timur Gimadiev # Copyright 2024 Philippe Gantzer # This file is part of chython. @@ -50,7 +50,7 @@ def canonicalize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging total.extend((-1, x, -1, m) for m, x in self.fix_groups_mapping(logging=True)) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) if logging: return total return bool(total) @@ -76,7 +76,7 @@ def standardize(self: 'ReactionContainer', *, fix_mapping: bool = True, logging= total.extend((-1, x, -1, m) for m, x in self.fix_groups_mapping(logging=True)) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) if logging: return total return bool(total) @@ -90,10 +90,10 @@ def thiele(self: 'ReactionContainer', *, fix_tautomers=True) -> bool: """ total = False for m in self.molecules(): - if m.thiele(fix_tautomers=fix_tautomers) and not total: + if m.thiele(fix_tautomers=fix_tautomers): total = True if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def kekule(self: 'ReactionContainer', *, buffer_size=7) -> bool: @@ -105,10 +105,10 @@ def kekule(self: 'ReactionContainer', *, buffer_size=7) -> bool: """ total = False for m in self.molecules(): - if m.kekule(buffer_size=buffer_size) and not total: + if m.kekule(buffer_size=buffer_size): total = True if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def clean_isotopes(self: 'ReactionContainer') -> bool: @@ -118,10 +118,10 @@ def clean_isotopes(self: 'ReactionContainer') -> bool: """ flag = False for m in self.molecules(): - if m.clean_isotopes() and not flag: + if m.clean_isotopes(): flag = True if flag: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return flag def clean_stereo(self: 'ReactionContainer'): @@ -130,7 +130,7 @@ def clean_stereo(self: 'ReactionContainer'): """ for m in self.molecules(): m.clean_stereo() - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) def check_valence(self: 'ReactionContainer') -> List[Tuple[int, Tuple[int, ...]]]: """ @@ -155,7 +155,7 @@ def implicify_hydrogens(self: 'ReactionContainer') -> int: for m in self.molecules(): total += m.implicify_hydrogens() if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def explicify_hydrogens(self: 'ReactionContainer') -> int: @@ -203,7 +203,7 @@ def explicify_hydrogens(self: 'ReactionContainer') -> int: m.remap(remap) if total: - self.flush_cache() + self.flush_cache(keep_molecule_cache=True) return total def remove_reagents(self, *, keep_reagents: bool = False, mapping: bool = True) -> bool: @@ -272,10 +272,9 @@ def __remove_reagents_rules(self: 'ReactionContainer', keep_reagents): tmp.extend(reagents_st2) reagents = tuple(tmp) if keep_reagents else () - self._ReactionContainer__reactants = tuple(reactants_st2) - self._ReactionContainer__products = tuple(products_st2) - self._ReactionContainer__reagents = reagents - self.flush_cache() + self._reactants = tuple(reactants_st2) + self._products = tuple(products_st2) + self._reagents = reagents self.fix_positions() return True @@ -307,10 +306,9 @@ def __remove_reagents_mapping(self: 'ReactionContainer', keep_reagents): reagents = tuple(tmp) if keep_reagents else () if len(reactants) != len(self.reactants) or len(products) != len(self.products) or len(reagents) != len(self.reagents): - self._ReactionContainer__reactants = tuple(reactants) - self._ReactionContainer__products = tuple(products) - self._ReactionContainer__reagents = reagents - self.flush_cache() + self._reactants = tuple(reactants) + self._products = tuple(products) + self._reagents = reagents self.fix_positions() return True return False @@ -327,7 +325,7 @@ def contract_ions(self: 'ReactionContainer') -> bool: salts = _contract_ions(anions, cations, total) if salts: neutral.extend(salts) - self._ReactionContainer__reagents = tuple(neutral) + self._reagents = tuple(neutral) changed = True else: changed = False @@ -338,7 +336,7 @@ def contract_ions(self: 'ReactionContainer') -> bool: anions_order = {frozenset(m): n for n, m in enumerate(anions)} cations_order = {frozenset(m): n for n, m in enumerate(cations)} neutral.extend(salts) - self._ReactionContainer__reactants = tuple(neutral) + self._reactants = tuple(neutral) changed = True else: anions_order = cations_order = {} @@ -350,11 +348,10 @@ def contract_ions(self: 'ReactionContainer') -> bool: salts = _contract_ions(anions, cations, total) if salts: neutral.extend(salts) - self._ReactionContainer__products = tuple(neutral) + self._products = tuple(neutral) changed = True if changed: - self.flush_cache() self.fix_positions() return True return False diff --git a/chython/algorithms/standardize/resonance.py b/chython/algorithms/standardize/resonance.py index 1270e3dd..593b4ef5 100644 --- a/chython/algorithms/standardize/resonance.py +++ b/chython/algorithms/standardize/resonance.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -24,6 +24,19 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +O = 8 +Si = 14 +P = 15 +S = 16 +As = 33 +Se = 34 +Te = 52 + + class Resonance: __slots__ = () @@ -35,21 +48,18 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals :param logging: return list of changed atoms. """ atoms = self._atoms - charges = self._charges - radicals = self._radicals bonds = self._bonds - calc_implicit = self._calc_implicit entries, exits, rads, constrains, nitrogen_cat, nitrogen_ani, sulfur_cat = self.__entries() hs = set() while len(rads) > 1: n = rads.pop() for path in self.__find_delocalize_path(n, rads, constrains, True): - radicals[n] = False + atoms[n]._is_radical = False hs.add(n) for n, m, b in path: hs.add(m) - bonds[n][m]._Bond__order = b # noqa - radicals[m] = False # noqa + bonds[n][m]._order = b + atoms[m]._is_radical = False # noqa rads.discard(m) break # path found # path not found. atom n keep as is @@ -60,29 +70,31 @@ def fix_resonance(self: Union['MoleculeContainer', 'Resonance'], *, logging=Fals if n in nitrogen_cat and m in nitrogen_ani: continue - c_m = charges[m] - 1 if m in sulfur_cat: # prevent X-[S+]=X >> X=S=X if b != 1: continue + atoms[m]._charge -= 1 else: # check cations end valence. + atoms[m]._charge -= 1 # reduce atom change and check valence try: - atoms[m].valence_rules(c_m, radicals[m], sum(int(y) for x, y in bonds[m].items() if x != l) + b) + atoms[m].valence_rules(sum(int(y) for x, y in bonds[m].items() if x != l) + b) except ValenceError: + atoms[m]._charge += 1 # roll back continue - charges[n] += 1 + # succeed! + atoms[n]._charge += 1 hs.add(n) for n, m, b in path: hs.add(m) - bonds[n][m]._Bond__order = b # noqa - charges[m] = c_m + bonds[n][m]._order = b exits.discard(m) break # path from negative atom to positive atom found. # path not found. keep negative atom n as is if hs: for n in hs: - calc_implicit(n) - self.flush_cache() + self.calc_implicit(n) + self.flush_cache(keep_sssr=True, keep_components=True) if _fix_stereo: self.fix_stereo() if logging: @@ -121,13 +133,9 @@ def __find_delocalize_path(self: 'MoleculeContainer', start, finish, constrains, if n not in seen and n in constrains and 1 <= (bo := b.order + diff) <= 3) def __entries(self: 'MoleculeContainer'): - hybridization = self.hybridization - neighbors = self.neighbors - charges = self._charges - radicals = self._radicals - bonds = self._bonds atoms = self._atoms - errors = {n for n, h in self._hydrogens.items() if h is None} + bonds = self._bonds + errors = {n for n, a in self.atoms() if a.implicit_hydrogens is None} transfer = set() entries = set() @@ -136,49 +144,50 @@ def __entries(self: 'MoleculeContainer'): nitrogen_cat = set() nitrogen_ani = set() sulfur_cat = set() - for n, a in atoms.items(): - if a.atomic_number not in {5, 6, 7, 8, 14, 15, 16, 33, 34, 52}: + for n, a in self.atoms(): + if a not in (B, C, N, O, Si, P, S, As, Se, Te): # filter non-organic set, halogens and aromatics continue - elif radicals[n]: + elif a.is_radical: rads.add(n) - elif charges[n] == -1: - if (lb := len(bonds[n])) == 4 and a.atomic_number == 5: # skip boron + elif a.charge == -1: + if (lb := len(bonds[n])) == 4 and a == B: # skip boron continue - elif lb == 6 and a.atomic_number == 15: # skip [P-]X6 + elif lb == 6 and a == P: # skip [P-]X6 continue if n in errors: # only valid anions accepted continue entries.add(n) - elif charges[n] == 1: + elif a.charge == 1: lb = len(bonds[n]) - if a.atomic_number == 7: + if a == N: if lb == 4: # skip ammonia continue - elif lb == 2 and hybridization(n) == 3: # skip Azide + elif lb == 2 and a.hybridization == 3: # skip Azide (n1, b1), (n2, b2) = bonds[n].items() - if b1.order == b2.order == 2 and (charges[n1] == -1 and atoms[n1].atomic_number == 7 or - charges[n2] == -1 and atoms[n2].atomic_number == 7): + an1 = atoms[n1] + an2 = atoms[n2] + if b1 == b2 == 2 and (an1.charge == -1 and an1 == N or an2.charge == -1 and an2 == N): continue - elif lb == 3 and hybridization(n) == 2: # X=[N+](-X)-X - prevent N-N migration + elif lb == 3 and a.hybridization == 2: # X=[N+](-X)-X - prevent N-N migration nitrogen_ani.add(n) - elif a.atomic_number == 15 and lb == 4: # skip [P+]R4 + elif a == P and lb == 4: # skip [P+]R4 continue - elif a.atomic_number == 16: - if lb == 2 and hybridization(n) == 2: # ad-hoc for X-[S+]=X + elif a == S: + if lb == 2 and a.hybridization == 2: # ad-hoc for X-[S+]=X sulfur_cat.add(n) - elif lb == 3 and hybridization(n) == 1: # ad-hoc for X-[S+](-X)-X + elif lb == 3 and a.hybridization == 1: # ad-hoc for X-[S+](-X)-X continue exits.add(n) transfer.add(n) if exits or entries: # try to move cation to nitrogen. saturation fixup. - for n, a in self._atoms.items(): - if a.atomic_number == 7 and not charges[n]: - if hybridization(n) == 1 and neighbors(n) <= 3: # any amine - potential e-donor + for n, a in self.atoms(): + if a == N and not a.charge: + if a.hybridization == 1 and a.neighbors <= 3: # any amine - potential e-donor entries.add(n) nitrogen_cat.add(n) - elif hybridization(n) == 3 and neighbors(n) == 1: # N#X-[X-] >> [N-]=X=X + elif a.hybridization == 3 and a.neighbors == 1: # N#X-[X-] >> [N-]=X=X exits.add(n) nitrogen_ani.add(n) return entries, exits, rads, transfer, nitrogen_cat, nitrogen_ani, sulfur_cat diff --git a/chython/algorithms/standardize/salts.py b/chython/algorithms/standardize/salts.py index 08a34250..48cec856 100644 --- a/chython/algorithms/standardize/salts.py +++ b/chython/algorithms/standardize/salts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,12 +18,18 @@ # from typing import TYPE_CHECKING, List, Tuple, Union from ._salts import acids, rules +from ...periodictable import GroupI, GroupII if TYPE_CHECKING: from chython import MoleculeContainer +# atomic number constants +H = 1 +N = 7 + + class Salts: __slots__ = () @@ -33,16 +39,20 @@ def remove_metals(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Li :param logging: return deleted atoms list. """ + atoms = self._atoms bonds = self._bonds metals = [] - for n, a in self._atoms.items(): - if a.atomic_symbol not in {7, 3, 4, 11, 12, 19, 20, 37, 38, 55, 56} and not bonds[n]: + for n, a in atoms.items(): + if not bonds[n] and (a == N or isinstance(a, (GroupI, GroupII)) and a != H): metals.append(n) if 0 < len(metals) < len(self): for n in metals: - self.delete_atom(n) + del atoms[n] + del bonds[n] + + self.flush_cache(keep_sssr=True) if logging: return metals return True @@ -64,27 +74,12 @@ def remove_acids(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Lis log.extend(c) if 0 < len(log) < len(self): # prevent singularity atoms = self._atoms - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - plane = self._plane bonds = self._bonds - parsed_mapping = self._parsed_mapping - - self._conformers.clear() # clean conformers. for n in log: del atoms[n] - del charges[n] - del radicals[n] - del hydrogens[n] - del plane[n] del bonds[n] - try: - del parsed_mapping[n] - except KeyError: - pass self.flush_cache() if logging: return log @@ -95,16 +90,14 @@ def remove_acids(self: 'MoleculeContainer', *, logging=False) -> Union[bool, Lis def split_metal_salts(self: 'MoleculeContainer', *, logging=False) -> Union[bool, List[Tuple[int, int]]]: """ - Split connected S-metal/lanthanides/actinides salts to cation/anion pairs. + Split connected S-metal salts to cation/anion pairs. :param logging: return deleted bonds list. """ + atoms = self._atoms bonds = self._bonds - charges = self._charges - metals = [n for n, a in self._atoms.items() if a.atomic_number in - {3, 4, 11, 12, 19, 20, 37, 38, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 87, 88, - 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102}] + metals = [n for n, a in atoms.items() if isinstance(a, (GroupI, GroupII)) and a != H] if metals: acceptors = set() log = [] @@ -114,12 +107,12 @@ def split_metal_salts(self: 'MoleculeContainer', *, logging=False) -> Union[bool for n in metals: for m in acceptors & bonds[n].keys(): - if charges[n] == 4: # prevent overcharging + if atoms[n].charge == 4: # prevent overcharging break del bonds[n][m] del bonds[m][n] - charges[n] += 1 - charges[m] -= 1 + atoms[n]._charge += 1 + atoms[m]._charge -= 1 log.append((n, m)) if log: self.flush_cache() diff --git a/chython/algorithms/standardize/saturation.py b/chython/algorithms/standardize/saturation.py index df9de68a..03fa9c1e 100644 --- a/chython/algorithms/standardize/saturation.py +++ b/chython/algorithms/standardize/saturation.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021, 2022 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -72,13 +72,17 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, raise ValenceError('only single bonded skeleton can be saturated') atoms = self._atoms if not reset_electrons: - expected_radicals_count = any(self._radicals.values()) + expected_radicals_count = sum(a.is_radical for a in atoms.values()) expected_charge = int(self) + if reset_electrons: + charges = {x: None for x in self} + radicals = {x: None for x in self} + else: + charges = {n: a.charge for n, a in self.atoms()} + radicals = {n: a.is_radical for n, a in self.atoms()} sat, adjacency = _find_possible_valences(atoms, neighbors_distances or self._bonds, - {x: None for x in self._atoms} if reset_electrons else self._charges, - {x: None for x in self._atoms} if reset_electrons else self._radicals, - neighbors_distances is not None) + charges, radicals, neighbors_distances is not None) charges = {} # new charge states radicals = {} # new radical states bonds = {n: {} for n in atoms} # new bonds @@ -95,8 +99,7 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, radicals[n] = r for m in env: if m not in seen: - bonds[n][m] = bonds[m][n] = b = Bond(1) - b._attach_graph(self, n, m) + bonds[n][m] = bonds[m][n] = Bond(1) else: unsaturated[n] = [(c, r, h)] else: @@ -142,8 +145,7 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, return False for n, m, b in sb: - bonds[n][m] = bonds[m][n] = b = Bond(b) - b._attach_graph(self, n, m) + bonds[n][m] = bonds[m][n] = Bond(b) for n, c, r in sa: charges[n] = c radicals[n] = r @@ -155,10 +157,14 @@ def saturate(self: 'MoleculeContainer', neighbors_distances: Optional[Dict[int, return False # reset molecule self._bonds = bonds - self._radicals = radicals - self._charges = charges - self._hydrogens = {x: 0 for x in atoms} # reset invalid hydrogens counts. + for n, r in radicals.items(): + atoms[n]._is_radical = r + for n, c in charges.items(): + atoms[n]._charge = c + for a in atoms.values(): + a._implicit_hydrogens = 0 # reset invalid hydrogens counts. self.flush_cache() + self.calc_labels() if logging: if not log: # check for errors log.append('Saturated successfully') diff --git a/tests/algorithms/standardize/__init__.py b/chython/algorithms/standardize/test/__init__.py similarity index 100% rename from tests/algorithms/standardize/__init__.py rename to chython/algorithms/standardize/test/__init__.py diff --git a/tests/algorithms/standardize/test_groups.py b/chython/algorithms/standardize/test/test_groups.py similarity index 100% rename from tests/algorithms/standardize/test_groups.py rename to chython/algorithms/standardize/test/test_groups.py diff --git a/chython/algorithms/stereo.py b/chython/algorithms/stereo.py new file mode 100644 index 00000000..6cd814a2 --- /dev/null +++ b/chython/algorithms/stereo.py @@ -0,0 +1,1222 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2019-2024 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from collections import defaultdict +from functools import cached_property +from itertools import combinations, product +from logging import getLogger, INFO +from typing import Dict, Set, Tuple, Union, List, Optional, TYPE_CHECKING +from .morgan import _morgan +from ..exceptions import AtomNotFound, IsChiral, NotChiral + + +logger = getLogger('chython.stereo') +logger.setLevel(INFO) + + +if TYPE_CHECKING: + from chython import MoleculeContainer + + +# atomic number constants +H = 1 +C = 6 + +# 1 2 +# \ | +# \| +# n---3 +# / +# / +# 0 +_tetrahedron_translate = {(0, 1, 2): False, (1, 2, 0): False, (2, 0, 1): False, + (0, 2, 1): True, (1, 0, 2): True, (2, 1, 0): True, + (0, 3, 1): False, (3, 1, 0): False, (1, 0, 3): False, + (0, 1, 3): True, (1, 3, 0): True, (3, 0, 1): True, + (0, 2, 3): False, (2, 3, 0): False, (3, 0, 2): False, + (0, 3, 2): True, (3, 2, 0): True, (2, 0, 3): True, + (1, 3, 2): False, (3, 2, 1): False, (2, 1, 3): False, + (1, 2, 3): True, (2, 3, 1): True, (3, 1, 2): True} +# 2 1 +# \ / +# n---m +# / \ +# 0 3 +_alkene_translate = {(0, 1): False, (1, 0): False, (0, 3): True, (3, 0): True, + (2, 3): False, (3, 2): False, (2, 1): True, (1, 2): True} + + +def _pyramid_sign(n, u, v, w): + # + # | n / + # | |\ + # | | \ + # | /| \ + # | / u---v + # |/___\_/___ + # w + # + nx, ny, nz = n + ux, uy, uz = u + vx, vy, vz = v + wx, wy, wz = w + + q1x = ux - nx + q1y = uy - ny + q1z = uz - nz + q2x = vx - nx + q2y = vy - ny + q2z = vz - nz + q3x = wx - nx + q3y = wy - ny + q3z = wz - nz + + vol = q1x * (q2y * q3z - q2z * q3y) + q1y * (q2z * q3x - q2x * q3z) + q1z * (q2x * q3y - q2y * q3x) + if vol > 0: + return 1 + elif vol < 0: + return -1 + return 0 + + +def _cis_trans_sign(n, u, v, w): + # n w + # \ / + # u--v + # / \ + # x x + nx, ny = n + ux, uy = u + vx, vy = v + wx, wy = w + + q1x = ux - nx + q1y = uy - ny + q2x = vx - ux + q2y = vy - uy + q3x = wx - vx + q3y = wy - vy + + # cross vectors + q1q2z = q1x * q2y - q1y * q2x + q2q3z = q2x * q3y - q2y * q3x + + dot = q1q2z * q2q3z + if dot > 0: + return 1 + elif dot < 0: + return -1 + return 0 + + +def _allene_sign(mark, u, v, w): + # n w + # | / + # u--v + ux, uy = u + vx, vy = v + wx, wy = w + + q2x = vx - ux + q2y = vy - uy + q3x = wx - vx + q3y = wy - vy + + # cross vectors + q2q3z = q2x * q3y - q2y * q3x + + dot = -mark * q2q3z + if dot > 0: + return 1 + elif dot < 0: + return -1 + return 0 + + +class MoleculeStereo: + __slots__ = () + + def clean_stereo(self: 'MoleculeContainer'): + """ + Remove stereo data. + """ + for _, a in self.atoms(): + a._stereo = None + for *_, b in self.bonds(): + b._stereo = None + self.flush_cache(keep_sssr=True, keep_components=True) + + @cached_property + def tetrahedrons(self: 'MoleculeContainer') -> Tuple[int, ...]: + """ + Carbon sp3 atom numbers. + """ + tetra = [] + for n, atom in self.atoms(): + if atom == C and not atom.charge and not atom.is_radical: + env = self._bonds[n] + if all(b == 1 for b in env.values()): + if sum(int(b) for b in env.values()) > 4: + continue + tetra.append(n) + return tuple(tetra) + + @cached_property + def cumulenes(self: 'MoleculeContainer') -> List[Tuple[int, ...]]: + """ + All double-bonds chains (e.g. alkenes, allenes, cumulenes). + """ + atoms = self._atoms + bonds = self._bonds + + adj = defaultdict(set) # double bonds adjacency matrix + for n, atom in atoms.items(): + if atom.is_forming_double_bonds: + adj_n = adj[n].add + for m, bond in bonds[n].items(): + if bond == 2 and atoms[m].is_forming_double_bonds: + adj_n(m) + if not adj: + return [] + + terminals = [x for x, y in adj.items() if len(y) == 1] # list to keep atoms order! + cumulenes = [] + while terminals: + n = terminals.pop(0) + m = adj[n].pop() + path = [n, m] + while m not in terminals: + if len(bonds[m]) > 2: # not cumulene. SO3, SO4- etc. + cumulenes.extend(zip(path, path[1:])) # keep single double bonds instead of cumulene chain. + break + adj_m = adj[m] + adj_m.discard(n) + n, m = m, adj_m.pop() + path.append(m) + else: + terminals.remove(m) + adj[m].pop() + cumulenes.append(tuple(path)) + return cumulenes + + @cached_property + def stereogenic_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int, int], Tuple[int, int, int, int]]]: + """ + Tetrahedrons which contains at least 3 non-hydrogen neighbors and corresponding neighbors order. + """ + # 2 + # | + # 1--K--3 + # | + # 4? + atoms = self._atoms + bonds = self._bonds + tetrahedrons = {} + for n in self.tetrahedrons: + if any(not atoms[x].is_forming_single_bonds for x in bonds[n]): + continue # skip metal-carbon complexes + env = tuple(x for x in bonds[n] if atoms[x] != H) + if len(env) in (3, 4): + tetrahedrons[n] = env + return tetrahedrons + + @cached_property + def stereogenic_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, ...], Tuple[int, int, Optional[int], Optional[int]]]: + """ + Cumulenes which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + # 5 4 + # \ / + # 2---3 + # / \ + # 1 6 + bonds = self._bonds + atoms = self._atoms + cumulenes = {} + for path in self.cumulenes: + nf = bonds[path[0]] + nl = bonds[path[-1]] + n1, m1 = path[1], path[-2] + if any(b == 3 or not atoms[m].is_forming_single_bonds and b != 8 + for m, b in nf.items() if m != n1): + continue # skip X=C=C structures and metal-carbon complexes + if any(b == 3 or not atoms[m].is_forming_single_bonds and b != 8 + for m, b in nl.items() if m != m1): + continue # skip X=C=C structures and metal-carbon complexes + nn = [x for x, b in nf.items() if x != n1 and atoms[x] != H and b != 8] + mn = [x for x, b in nl.items() if x != m1 and atoms[x] != H and b != 8] + if nn and mn: + sn = nn[1] if len(nn) == 2 else None + sm = mn[1] if len(mn) == 2 else None + cumulenes[path] = (nn[0], mn[0], sn, sm) + return cumulenes + + @cached_property + def stereogenic_allenes(self) -> Dict[int, Tuple[int, int, Optional[int], Optional[int]]]: + """ + Allenes which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + return {path[len(path) // 2]: env for path, env in self.stereogenic_cumulenes.items() if len(path) % 2} + + @cached_property + def stereogenic_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[int], Optional[int]]]: + """ + Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends and corresponding neighbors order. + """ + stereo = {} + for path, env in self.stereogenic_cumulenes.items(): + if len(path) % 2: + continue + stereo[(path[0], path[-1])] = env + return stereo + + @cached_property + def ring_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int], Tuple[int], Tuple]]: + """ + Tetrahedrons in rings, except ring-linkers. Values are non-ring atoms. + """ + out = {} + atoms_rings = self.atoms_rings + tetrahedrons = self.stereogenic_tetrahedrons + points = self.rings_linker_tetrahedrons + environment = self.not_special_connectivity + for n, r in atoms_rings.items(): + if n in tetrahedrons and n not in points: + out[n] = tuple(environment[n].difference(atoms_rings)) + return out + + @cached_property + def rings_linker_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Tuple[int, int, int, int]]: + """ + A dictionary where the keys are tetrahedron atoms shared between two rings (not condensed rings) and the values + are tuples representing their neighbors in the first and second rings respectively. + """ + out = {} + tetrahedrons = self.stereogenic_tetrahedrons + for n, r in self.atoms_rings.items(): + if n in tetrahedrons: + for nr, mr in combinations(r, 2): + if len(set(nr).intersection(mr)) == 1: + ni = nr.index(n) + mi = mr.index(n) + out[n] = (nr[ni - 1], nr[ni - len(nr) + 1], mr[mi - 1], mr[mi - len(mr) + 1]) + break + return out + + @cached_property + def ring_cumulenes_terminals(self: 'MoleculeContainer') -> Set[Tuple[int, int]]: + """ + Terminal atoms of inside ring cumulenes. + """ + out = set() + ar = self.atoms_rings + for n, *_, m in self.stereogenic_cumulenes: + if n in ar and m in ar and not set(ar[n]).isdisjoint(ar[m]): + out.add((n, m)) + return out + + @cached_property + def rings_linker_cumulenes_terminals(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Tuple[int, int, int, int]]: + """ + Terminal atoms of cumulenes connecting two rings. Values are neighbors in first and second rings. + """ + out = {} + ar = self.atoms_rings + chord = self.ring_cumulenes_terminals + for (n, *_, m), (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): + if n in ar and m in ar and (n, m) not in chord: + out[(n, m)] = (n1, n2, m1, m2) + return out + + @cached_property + def ring_attached_cumulenes(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Union[Tuple[int, int], Tuple[int]]]: + """ + Cumulenes attached to rings from one side. Values are out of ring neighbor atoms. + """ + ar = self.atoms_rings + out = {} + for (n, *_, m), (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): + if n in ar: + if m in ar: + continue + if m2: + out[(n, m)] = (m1, m2) + else: + out[(n, m)] = (m1,) + elif m in ar: + if n2: + out[(n, m)] = (n1, n2) + else: + out[(n, m)] = (n1,) + return out + + @property + def chiral_tetrahedrons(self) -> Set[int]: + """ + Chiral tetrahedrons except already labeled ones. + """ + return self.__chiral_centers[0] + + @property + def chiral_cis_trans(self) -> Set[Tuple[int, int]]: + """ + Chiral cis-trans bonds except already labeled ones. + """ + return self.__chiral_centers[1] + + @property + def chiral_allenes(self) -> Set[int]: + """ + Chiral allenes except already labeled ones. + """ + return self.__chiral_centers[2] + + def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cache=True): + """ + Add stereo data by wedge notation of bonds. Use it for tetrahedrons of allenes. + + :param n: number of atom from which wedge bond started + :param m: number of atom to which wedge bond coming + :param mark: up bond is 1, down is -1 + """ + atoms = self._atoms + if n not in atoms or m not in atoms or m not in self._bonds[n]: + raise AtomNotFound + elif atoms[n].stereo is not None: + raise IsChiral + elif c := self._stereo_allenes_centers.get(n): + # allenes + if atoms[c].stereo is not None: + raise IsChiral + elif c not in self.chiral_allenes: + raise NotChiral + + t1, t2 = self._stereo_allenes_terminals[c] + order = self.stereogenic_allenes[c] + if atoms[m] == H: + if t1 == n: + m1 = order[1] + else: + t1, t2 = t2, t1 + m1 = order[0] + r = True + else: + w = order.index(m) + if w == 0: + m1 = order[1] + r = False + elif w == 1: + m1 = order[0] + t1, t2 = t2, t1 + r = False + elif w == 2: + m1 = order[1] + r = True + else: + m1 = order[0] + t1, t2 = t2, t1 + r = True + if s := _allene_sign(mark, atoms[t1].xy, atoms[t2].xy, atoms[m1].xy): + atoms[c]._stereo = s < 0 if r else s > 0 + if clean_cache: + self.flush_cache(keep_sssr=True, keep_components=True) + # tetrahedrons + elif n in self.chiral_tetrahedrons: + th = self.stereogenic_tetrahedrons[n] + am = atoms[m] + if am == H: + order = [] + for x in th: + ax = atoms[x] + order.append((ax.x, ax.y, 0)) + s = _pyramid_sign((am.x, am.y, mark), *order) + else: + order = [] + for x in th: + ax = atoms[x] + order.append((ax.x, ax.y, mark if x == m else 0)) + if len(order) == 3: + if len(self._bonds[n]) == 4: # explicit hydrogen + x = next(x for x in self._bonds[n] if x not in th) + ax = atoms[x] + s = _pyramid_sign((ax.x, ax.y, 0), *order) + else: + an = atoms[n] + s = _pyramid_sign((an.x, an.y, 0), *order) + else: + s = _pyramid_sign(order[-1], *order[:3]) + if s: + atoms[n]._stereo = s > 0 + if clean_cache: + self.flush_cache(keep_components=True, keep_sssr=True) + else: + raise NotChiral + + def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): + """ + Calculate cis-trans stereo bonds from given 2d coordinates. Unusable for SMILES and INCHI. + """ + atoms = self._atoms + flag = False + while self.chiral_cis_trans: + stereo = False + for nm in self.chiral_cis_trans: + n, m = nm + n1, m1, *_ = self.stereogenic_cis_trans[nm] + s = _cis_trans_sign(atoms[n1].xy, atoms[n].xy, atoms[m].xy, atoms[m1].xy) + if s: + stereo = True + i, j = self._stereo_cis_trans_centers[n] + self._bonds[i][j]._stereo = s > 0 + if stereo: + flag = True + self.flush_stereo_cache() + else: + break + if flag and clean_cache: + self.flush_cache(keep_components=True, keep_sssr=True) + + def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mark: bool, *, clean_cache=True): + """ + Add stereo data for specified neighbors bypass. Use it for tetrahedrons or allenes. + + :param n: number of tetrahedron atom or central atom of allene. + :param env: numbers of atoms with specified bypass + :param mark: clockwise or anti bypass. + + See and + """ + try: + atom = self._atoms[n] + except KeyError: + raise AtomNotFound + if atom.stereo is not None: + raise IsChiral + if not isinstance(mark, bool): + raise TypeError('stereo mark should be bool') + + if n in self.chiral_tetrahedrons: + atom._stereo = self._translate_tetrahedron_sign(n, env, mark) + if clean_cache: + self.flush_cache(keep_components=True, keep_sssr=True) + elif n in self.chiral_allenes: + atom._stereo = self._translate_allene_sign(n, *env, mark) + if clean_cache: + self.flush_cache(keep_components=True, keep_sssr=True) + else: # only tetrahedrons supported + raise NotChiral + + def add_cis_trans_stereo(self: 'MoleculeContainer', n: int, m: int, n1: int, n2: int, mark: bool, *, + clean_cache=True): + """ + Add stereo data to cis-trans double bonds (not allenes). + + n1/n=m/n2 + + :param n: number of starting atom of double bonds chain (alkenes of cumulenes) + :param m: number of ending atom of double bonds chain (alkenes of cumulenes) + :param n1: number of neighboring atom of starting atom + :param n2: number of neighboring atom of ending atom + :param mark: cis or trans + + See and int: + return sum(b.stereo is not None for *_, b in self.bonds()) + + @cached_property + def _stereo_cis_trans_centers(self) -> Dict[int, Tuple[int, int]]: + """ + Cis-Trans terminal atoms to cis-trans key mapping. Key is central double bond in a cumulene chain. + """ + terminals = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + i = len(path) // 2 + terminals[n] = terminals[m] = (path[i - 1], path[i]) + return terminals + + @cached_property + def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: + """ + Cis-Trans terminal and central atoms to terminal pair mapping. + """ + terminals = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + i = len(path) // 2 + terminals[n] = terminals[m] = terminals[path[i]] = terminals[path[i - 1]] = (n, m) + return terminals + + @cached_property + def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: + """ + Cis-Trans terminal atoms counterparts + """ + counterpart = {} + for path in self.stereogenic_cumulenes: + if len(path) % 2: + continue + n, m = path[0], path[-1] + counterpart[n] = m + counterpart[m] = n + return counterpart + + @cached_property + def _stereo_allenes_centers(self) -> Dict[int, int]: + """ + Allene terminal atom to center mapping + """ + terminals = {} + for c, (n, m) in self._stereo_allenes_terminals.items(): + terminals[n] = terminals[m] = c + return terminals + + @cached_property + def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: + """ + Allene center atom to terminals mapping + """ + return {path[len(path) // 2]: (path[0], path[-1]) for path in self.stereogenic_cumulenes if len(path) % 2} + + def _translate_tetrahedron_sign(self: 'MoleculeContainer', n, env, s=None): + """ + Get sign of chiral tetrahedron atom for specified neighbors order + + :param n: stereo atom + :param env: neighbors order + :param s: if None, use existing sign else translate given to molecule + """ + if s is None: + s = self._atoms[n].stereo + if s is None: + raise KeyError + + order = self.stereogenic_tetrahedrons[n] + if len(order) == 3: + if len(env) == 4: # hydrogen atom passed to env + # hydrogen always last in order + try: + order = (*order, next(x for x in env if self._atoms[x] == H)) # see translate scheme + except StopIteration: + raise KeyError + elif len(env) != 3: # pyramid or tetrahedron expected + raise ValueError('invalid atoms list') + elif len(env) not in (3, 4): # pyramid or tetrahedron expected + raise ValueError('invalid atoms list') + + translate = tuple(order.index(x) for x in env[:3]) + if _tetrahedron_translate[translate]: + return not s + return s + + def _translate_cis_trans_sign(self: 'MoleculeContainer', n, m, nn, nm, s=None): + """ + Get sign for specified opposite neighbors + + :param n: first double bonded atom + :param m: last double bonded atom + :param nn: neighbor of first atom + :param nm: neighbor of last atom + :param s: if None, use existing sign else translate given to molecule + """ + try: + n0, n1, n2, n3 = self.stereogenic_cis_trans[(n, m)] + except KeyError: + n0, n1, n2, n3 = self.stereogenic_cis_trans[(m, n)] + n, m = m, n # in alkenes sign not order depended + nn, nm = nm, nn + + if s is None: + i, j = self._stereo_cis_trans_centers[n] + s = self._bonds[i][j].stereo + if s is None: + raise KeyError + + if nn == n0: # same start + t0 = 0 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm] == H: + t1 = 3 + else: + raise KeyError + elif nn == n1: + t0 = 1 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm] == H: + t1 = 2 + else: + raise KeyError + elif nn == n2 or n2 is None and self._atoms[nn] == H: + t0 = 2 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm] == H: + t1 = 3 + else: + raise KeyError + elif nn == n3 or n3 is None and self._atoms[nn] == H: + t0 = 3 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm] == H: + t1 = 2 + else: + raise KeyError + else: + raise KeyError + + if _alkene_translate[(t0, t1)]: + return not s + return s + + def _translate_allene_sign(self: 'MoleculeContainer', c, nn, nm, s=None): + """ + get sign for specified opposite neighbors + + :param c: central double bonded atom + :param nn: neighbor of first double bonded atom + :param nm: neighbor of last double bonded atom + :param s: if None, use existing sign else translate given to molecule + """ + if s is None: + s = self._atoms[c].stereo + if s is None: + raise KeyError + + n0, n1, n2, n3 = self.stereogenic_allenes[c] + if nn == n0: # same start + t0 = 0 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm] == H: + t1 = 3 + else: + raise KeyError + elif nn == n1: + t0 = 1 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm] == H: + t1 = 2 + else: + raise KeyError + elif nn == n2 or n2 is None and self._atoms[nn] == H: + t0 = 2 + if nm == n1: + t1 = 1 + elif nm == n3 or n3 is None and self._atoms[nm] == H: + t1 = 3 + else: + raise KeyError + elif nn == n3 or n3 is None and self._atoms[nn] == H: + t0 = 3 + if nm == n0: + t1 = 0 + elif nm == n2 or n2 is None and self._atoms[nm] == H: + t1 = 2 + else: + raise KeyError + else: + raise KeyError + + if _alkene_translate[(t0, t1)]: + return not s + return s + + @cached_property + def _wedge_map(self: Union['MoleculeContainer', 'MoleculeStereo']): + atoms = self._atoms + + overlap = set() + space = [] + solved = [] + seen = set() + for n, env in self.stereogenic_allenes.items(): + if atoms[n].stereo is None: + continue + term = self._stereo_allenes_terminals[n] + overlap.update(term) # don't allow incoming wedge to allenes terminals + orders = [(*env[:2], *term, n, True), (*env[1::-1], *term[::-1], n, True)] + if env[2]: + orders.append((env[2], env[1], *term, n, True)) + if env[3]: + orders.append((env[3], env[0], *term[::-1], n, True)) + space.append(orders) + for n, env in self.stereogenic_tetrahedrons.items(): + if atoms[n].stereo is None: + continue + overlap.add(n) # don't allow incoming wedge to stereo tetrahedrons + order = list(env) + orders = [(*order, n, False)] + for _ in range(1, len(order)): + order = order.copy() + order.append(order.pop(0)) + orders.append((*order, n, False)) + space.append(orders) + + while space: + ls = len(space) + unsolved = [] + for orders in space: + good = [] + if orders[0][-1]: + for x in orders: + x0 = x[0] + if x0 in seen or x0 not in overlap: + good.append(x) + seen.add(x[2]) + if good: + solved.append(max(good, key=lambda x: (not atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) + else: + unsolved.append(orders) + else: + for x in orders: + x0 = x[0] + if x0 in seen or x0 not in overlap: + good.append(x) + if good: + seen.add(x[-2]) + solved.append(max(good, key=lambda x: (not atoms[x[0]].in_ring, atoms[x[0]].atomic_number))) + else: + unsolved.append(orders) + space = unsolved + if len(unsolved) == ls: + break + + solved = [y for x in solved if (y := self.__wedge_sign(x))] + if not space: + return solved + + for orders in product(*space): + used = set() + wedge = [] + for order in orders: + if order[-1]: # allene + if (order[0], order[2]) in used: + break + used.add((order[2], order[0])) + wedge.append(self.__wedge_sign(order)) + else: # TH + n = order[-2] + if (order[0], n) in used: + break + used.add((n, order[0])) + wedge.append(self.__wedge_sign(order)) + else: # found + solved.extend(wedge) + return solved + logger.info('wedge stereo mapping failed') + return solved + + def __wedge_sign(self: 'MoleculeContainer', order): + if order[-1]: # allene + s = self._translate_allene_sign(order[-2], order[0], order[1]) + v = _allene_sign(1, self._atoms[order[2]].xy, self._atoms[order[3]].xy, self._atoms[order[1]].xy) + if not v: + logger.info(f'need 2d clean. allenes wedge stereo ambiguous for atom {order[-2]}') + if s: + return order[2], order[0], v + else: + return order[2], order[0], -v + else: # TH + n = order[-2] + s = self._translate_tetrahedron_sign(n, order[:-2]) + # need recalculation if XY changed + ao0 = self._atoms[order[0]] + ao1 = self._atoms[order[1]] + ao2 = self._atoms[order[2]] + if len(order) == 5: + an = self._atoms[n] + v = _pyramid_sign((an.x, an.y, 0), + (ao0.x, ao0.y, 1), + (ao1.x, ao1.y, 0), + (ao2.x, ao2.y, 0)) + else: + ao3 = self._atoms[order[3]] + v = _pyramid_sign((ao3.x, ao3.y, 0), + (ao0.x, ao0.y, 1), + (ao1.x, ao1.y, 0), + (ao2.x, ao2.y, 0)) + if not v: + logger.info(f'need 2d clean. tetrahedron wedge stereo ambiguous for atom {n}') + if s: + return n, order[0], v + else: + return n, order[0], -v + + @cached_property + def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: + stereo_atoms = {n for n, a in self.atoms() if a.stereo is not None} + stereo_bonds = {n for n, mb in self._bonds.items() if any(b.stereo is not None for m, b in mb.items())} + if not stereo_atoms and not stereo_bonds: + return self.atoms_order + + morgan = self.atoms_order.copy() + atoms_stereo = stereo_atoms.intersection(self.tetrahedrons) + allenes_stereo = stereo_atoms - atoms_stereo + + cis_trans_terminals = self._stereo_cis_trans_terminals + cis_trans_stereo = {cis_trans_terminals[n] for n in stereo_bonds} + + while True: + # try iteratively differentiate stereo atoms. + morgan, atoms_stereo, cis_trans_stereo, allenes_stereo, atoms_groups, cis_trans_groups, allenes_groups = \ + self.__differentiation(morgan, atoms_stereo, cis_trans_stereo, allenes_stereo) + if not atoms_groups and not cis_trans_groups and not allenes_groups: + break + # for some rings differentiation by morgan impossible. try randomly set new weights. + # sometimes this will lead to pseudo chiral centers and non-unique morgan. + for group in atoms_groups: + for n in group[:len(group) // 2]: # set new weight in half of group randomly. + morgan[n] = -morgan[n] + for group in cis_trans_groups: + for n, _ in group[:len(group) // 2]: # set new weight in half of group randomly. + morgan[n] = -morgan[n] + for group in allenes_groups: + for n in group[:len(group) // 2]: # set new weight in half of group randomly. + morgan[n] = -morgan[n] + morgan = _morgan(morgan, self.int_adjacency) + return morgan + + @cached_property + def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): + atoms_rings = self.atoms_rings + tetrahedrons = self.stereogenic_tetrahedrons + cis_trans = self.stereogenic_cis_trans + allenes_centers = self._stereo_allenes_centers + cis_trans_terminals = self._stereo_cis_trans_terminals + cis_trans_centers = self._stereo_cis_trans_centers + morgan = self._chiral_morgan + + # find new chiral atoms and bonds. + # tetrahedron is chiral if all its neighbors are unique. + chiral_t = {n for n, env in tetrahedrons.items() if len({morgan[x] for x in env}) == len(env)} + # tetrahedrons-linkers is chiral if in each rings neighbors are unique. + chiral_t.update(n for n, (n1, n2, m1, m2) in self.rings_linker_tetrahedrons.items() + if morgan[n1] != morgan[n2] and morgan[m1] != morgan[m2]) + + # required for axes detection. + graph = {} + stereogenic = set() + pseudo = {} + + # double bond is chiral if neighbors of each terminal atom is unique. + # ring-linkers and rings-attached also takes into account. + chiral_c = set() + chiral_a = set() + for path, (n1, m1, n2, m2) in self.stereogenic_cumulenes.items(): + if morgan[n1] != morgan.get(n2, 0) and morgan[m1] != morgan.get(m2, 0): + n, m = path[0], path[-1] + if len(path) % 2: + chiral_a.add(path[len(path) // 2]) + else: + chiral_c.add(n) + stereogenic.add(n) + stereogenic.add(m) + # ring cumulenes always chiral. can be already added. + for nm in self.ring_cumulenes_terminals: + n, m = nm + if any(len(x) < 8 for x in atoms_rings[n]): # skip small rings. + if n in chiral_c: # remove already added small rings cumulenes. + chiral_c.discard(n) + if m in chiral_c: + chiral_c.discard(m) + elif n in allenes_centers and (c := allenes_centers[n]) in chiral_a: + chiral_a.discard(c) + continue + elif nm in cis_trans: + chiral_c.add(n) + else: + chiral_a.add(allenes_centers[n]) + pseudo[m] = n + graph[n] = set() + stereogenic.add(n) + + # find chiral axes. build graph of stereogenic atoms in rings. + # atoms connected then located in same ring or cumulene. + for n, env in self.ring_tetrahedrons.items(): + if len(env) == 2: # one or zero non-ring neighbors stereogenic. + n1, n2 = env + if morgan[n1] == morgan[n2]: # only unique non-ring members required. + continue + graph[n] = set() + stereogenic.add(n) # non-linker tetrahedrons in rings - stereogenic. + for n, (n1, n2, m1, m2) in self.rings_linker_tetrahedrons.items(): + graph[n] = set() + if morgan[n1] != morgan[n2] or morgan[m1] != morgan[m2]: + stereogenic.add(n) # linkers with at least one unsymmetric ring. + for n, m in self.rings_linker_cumulenes_terminals: + graph[n] = {m} + graph[m] = {n} + # stereogenic atoms already found. + for (n, m), env in self.ring_attached_cumulenes.items(): + if len(env) == 2: + n1, n2 = env + if morgan[n1] == morgan[n2]: # only unique non-ring members required. + continue + if n in atoms_rings: + graph[n] = set() # non ring endpoints not required. + stereogenic.add(n) # mark as stereogenic + else: + graph[m] = set() + stereogenic.add(m) + + if len(graph) > 1: # add bonds to graph. bonds connects atoms in same rings and terminal atoms of cumulenes. + for n, ms in graph.items(): + for r in atoms_rings[n]: + for m in r: + if n == m: + continue + elif m in graph: + ms.add(m) + elif m in pseudo and (m := pseudo[m]) != n: + ms.add(m) + # remove not stereogenic terminals. + while True: + try: + n = next(n for n, ms in graph.items() if not ms or len(ms) == 1 and n not in stereogenic) + except StopIteration: + break + for m in graph.pop(n): + graph[m].discard(n) + # update chiral atoms. + for n in graph: + if n in tetrahedrons: + chiral_t.add(n) + elif n in allenes_centers: + chiral_a.add(allenes_centers[n]) + else: + chiral_c.add(n) + + # skip already marked. + stereo_atoms = {n for n, a in self.atoms() if a.stereo is not None} + chiral_t.difference_update(stereo_atoms) + chiral_a.difference_update(stereo_atoms) + diff = set() + for n in chiral_c: + i, j = cis_trans_centers[n] + if self._bonds[i][j].stereo is None: + diff.add(cis_trans_terminals[n]) + return chiral_t, diff, chiral_a + + def __differentiation(self: Union['MoleculeStereo', 'MoleculeContainer'], morgan, + atoms_stereo, cis_trans_stereo, allenes_stereo): + bonds = self.int_adjacency + + tetrahedrons = self.stereogenic_tetrahedrons + cis_trans = self.stereogenic_cis_trans + allenes = self.stereogenic_allenes + + translate_tetrahedron = self._translate_tetrahedron_sign + translate_cis_trans = self._translate_cis_trans_sign + translate_allene = self._translate_allene_sign + + while True: + morgan_update = {} + atoms_groups = [] + cis_trans_groups = [] + allenes_groups = [] + # recalculate morgan weights with taking into account existing stereo marks. + if atoms_stereo: + grouped_stereo = defaultdict(list) + for n in atoms_stereo: + grouped_stereo[morgan[n]].append(n) # collect equal stereo atoms. + for group in grouped_stereo.values(): + if not len(group) % 2: # only even number of equal stereo atoms give new stereo center. + # process only truly stereogenic. + if len(env := tetrahedrons[group[0]]) == len({morgan[x] for x in env}): + s = [n for n in group if translate_tetrahedron(n, sorted(tetrahedrons[n], key=morgan.get))] + if 0 < len(s) < len(group): # RS pair required. + for m in s: + morgan_update[m] = -morgan[m] + for n in group: # prevent checks repeating. + atoms_stereo.discard(n) + else: # stereo group in rings. unambiguous environment order impossible. + atoms_groups.append(group) + + if cis_trans_stereo: + grouped_stereo = defaultdict(list) + for nm in cis_trans_stereo: + n, m = nm + if (mn := morgan[n]) <= (mm := morgan[m]): + grouped_stereo[mn].append((n, nm)) + else: + grouped_stereo[mm].append((m, nm)) + for group in grouped_stereo.values(): + if not len(group) % 2: # only even number of equal stereo bonds give new stereo center. + n1, m1, n2, m2 = cis_trans[group[0][1]] + if morgan[n1] != morgan.get(n2, 0) and morgan[m1] != morgan.get(m2, 0): + s = [] + for x, nm in group: + n, m = nm + n1, m1, n2, m2 = cis_trans[nm] + if n2 is None: + a = n1 + else: + a = min(n1, n2, key=morgan.get) + if m2 is None: + b = m1 + else: + b = min(m1, m2, key=morgan.get) + if translate_cis_trans(n, m, a, b): + s.append(x) + if 0 < len(s) < len(group): # RS pair required. + for n in s: + morgan_update[n] = -morgan[n] + for _, nm in group: + cis_trans_stereo.discard(nm) + else: + cis_trans_groups.append(group) + + if allenes_stereo: + grouped_stereo = defaultdict(list) + for c in allenes_stereo: + grouped_stereo[morgan[c]].append(c) + for group in grouped_stereo.values(): + if not len(group) % 2: # only even number of equal stereo bonds give new stereo center. + n1, m1, n2, m2 = allenes[group[0]] + if morgan[n1] != morgan.get(n2, 0) and morgan[m1] != morgan.get(m2, 0): + s = [] + for c in group: + n1, m1, n2, m2 = allenes[c] + if n2 is None: + a = n1 + else: + a = min(n1, n2, key=morgan.get) + if m2 is None: + b = m1 + else: + b = min(m1, m2, key=morgan.get) + if translate_allene(c, a, b): + s.append(c) + if 0 < len(s) < len(group): # RS pair required. + for c in s: + morgan_update[c] = -morgan[c] + for c in group: + allenes_stereo.discard(c) + else: + allenes_groups.append(group) + if not morgan_update: + break + morgan = _morgan({**morgan, **morgan_update}, bonds) + return morgan, atoms_stereo, cis_trans_stereo, allenes_stereo, atoms_groups, cis_trans_groups, allenes_groups + + +__all__ = ['MoleculeStereo'] diff --git a/chython/algorithms/stereo/graph.py b/chython/algorithms/stereo/graph.py deleted file mode 100644 index 01dbd26e..00000000 --- a/chython/algorithms/stereo/graph.py +++ /dev/null @@ -1,449 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019-2024 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from collections import defaultdict -from functools import cached_property -from typing import Dict, Optional, Tuple, TYPE_CHECKING, Union - - -if TYPE_CHECKING: - from chython import MoleculeContainer, QueryContainer - Container = Union[MoleculeContainer, QueryContainer] - - -_heteroatoms = {5, 6, 7, 8, 14, 15, 16, 17, 33, 34, 35, 52, 53} - -# 1 2 -# \ | -# \| -# n---3 -# / -# / -# 0 -_tetrahedron_translate = {(0, 1, 2): False, (1, 2, 0): False, (2, 0, 1): False, - (0, 2, 1): True, (1, 0, 2): True, (2, 1, 0): True, - (0, 3, 1): False, (3, 1, 0): False, (1, 0, 3): False, - (0, 1, 3): True, (1, 3, 0): True, (3, 0, 1): True, - (0, 2, 3): False, (2, 3, 0): False, (3, 0, 2): False, - (0, 3, 2): True, (3, 2, 0): True, (2, 0, 3): True, - (1, 3, 2): False, (3, 2, 1): False, (2, 1, 3): False, - (1, 2, 3): True, (2, 3, 1): True, (3, 1, 2): True} -# 2 1 -# \ / -# n---m -# / \ -# 0 3 -_alkene_translate = {(0, 1): False, (1, 0): False, (0, 3): True, (3, 0): True, - (2, 3): False, (3, 2): False, (2, 1): True, (1, 2): True} - -# allowed atoms. these atoms have stable covalent bonds. -_organic_subset = {1, 5, 6, 7, 8, 9, 14, 15, 16, 17, 33, 34, 35, 52, 53, 85} - - -class Stereo: - __slots__ = () - - @cached_property - def cumulenes(self) -> Tuple[Tuple[int, ...], ...]: - """ - Alkenes, allenes and cumulenes atoms numbers. - """ - return tuple(self._cumulenes()) - - @cached_property - def tetrahedrons(self: 'Container') -> Tuple[int, ...]: - """ - Carbon sp3 atoms numbers. - """ - atoms = self._atoms - bonds = self._bonds - charges = self._charges - radicals = self._radicals - - tetra = [] - for n, atom in atoms.items(): - if atom.atomic_number == 6 and not charges[n] and not radicals[n]: - env = bonds[n] - if all(int(x) == 1 for x in env.values()): - if sum(int(x) for x in env.values()) > 4: - continue - tetra.append(n) - return tuple(tetra) - - def clean_stereo(self: 'Container'): - """ - Remove stereo data. - """ - self._atoms_stereo.clear() - self._allenes_stereo.clear() - self._cis_trans_stereo.clear() - self.flush_cache() - - def get_mapping(self: 'Container', other: 'Container', **kwargs): - atoms_stereo = self._atoms_stereo - allenes_stereo = self._allenes_stereo - cis_trans_stereo = self._cis_trans_stereo - if atoms_stereo or allenes_stereo or cis_trans_stereo: - other_atoms_stereo = other._atoms_stereo - other_allenes_stereo = other._allenes_stereo - other_cis_trans_stereo = other._cis_trans_stereo - other_translate_tetrahedron_sign = other._translate_tetrahedron_sign - other_translate_allene_sign = other._translate_allene_sign - other_translate_cis_trans_sign = other._translate_cis_trans_sign - - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans - allenes = self._stereo_allenes - - for mapping in super().get_mapping(other, **kwargs): - for n, s in atoms_stereo.items(): - m = mapping[n] - if m not in other_atoms_stereo: # self stereo atom not stereo in other - break - # translate stereo mark in other in order of self tetrahedron - if other_translate_tetrahedron_sign(m, [mapping[x] for x in tetrahedrons[n]]) != s: - break - else: - for n, s in allenes_stereo.items(): - m = mapping[n] - if m not in other_allenes_stereo: # self stereo allene not stereo in other - break - # translate stereo mark in other in order of self allene - nn, nm, *_ = allenes[n] - if other_translate_allene_sign(m, mapping[nn], mapping[nm]) != s: - break - else: - for nm, s in cis_trans_stereo.items(): - n, m = nm - on, om = mapping[n], mapping[m] - if (on, om) not in other_cis_trans_stereo: - if (om, on) not in other_cis_trans_stereo: - break # self stereo cis_trans not stereo in other - else: - nn, nm, *_ = cis_trans[nm] - if other_translate_cis_trans_sign(om, on, mapping[nm], mapping[nn]) != s: - break - else: - nn, nm, *_ = cis_trans[nm] - if other_translate_cis_trans_sign(on, om, mapping[nn], mapping[nm]) != s: - break - else: - yield mapping - else: - yield from super().get_mapping(other, **kwargs) - - def _translate_tetrahedron_sign(self: 'Container', n, env, s=None): - """ - Get sign of chiral tetrahedron atom for specified neighbors order - - :param n: stereo atom - :param env: neighbors order - :param s: if None, use existing sign else translate given to molecule - """ - if s is None: - s = self._atoms_stereo[n] - - order = self._stereo_tetrahedrons[n] - if len(order) == 3: - if len(env) == 4: # hydrogen atom passed to env - atoms = self._atoms - # hydrogen always last in order - try: - order = (*order, next(x for x in env if atoms[x].atomic_number == 1)) # see translate scheme - except StopIteration: - raise KeyError - elif len(env) != 3: # pyramid or tetrahedron expected - raise ValueError('invalid atoms list') - elif len(env) not in (3, 4): # pyramid or tetrahedron expected - raise ValueError('invalid atoms list') - - translate = tuple(order.index(x) for x in env[:3]) - if _tetrahedron_translate[translate]: - return not s - return s - - def _translate_cis_trans_sign(self: 'Container', n, m, nn, nm, s=None): - """ - Get sign for specified opposite neighbors - - :param n: first double bonded atom - :param m: last double bonded atom - :param nn: neighbor of first atom - :param nm: neighbor of last atom - :param s: if None, use existing sign else translate given to molecule - """ - if s is None: - try: - s = self._cis_trans_stereo[(n, m)] - except KeyError: - s = self._cis_trans_stereo[(m, n)] - n, m = m, n # in alkenes sign not order depended - nn, nm = nm, nn - - atoms = self._atoms - n0, n1, n2, n3 = self._stereo_cis_trans[(n, m)] - if nn == n0: # same start - t0 = 0 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n1: - t0 = 1 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - elif nn == n2 or n2 is None and atoms[nn].atomic_number == 1: - t0 = 2 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n3 or n3 is None and atoms[nn].atomic_number == 1: - t0 = 3 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - else: - raise KeyError - - if _alkene_translate[(t0, t1)]: - return not s - return s - - def _translate_allene_sign(self: 'Container', c, nn, nm, s=None): - """ - get sign for specified opposite neighbors - - :param c: central double bonded atom - :param nn: neighbor of first double bonded atom - :param nm: neighbor of last double bonded atom - :param s: if None, use existing sign else translate given to molecule - """ - if s is None: - s = self._allenes_stereo[c] - - atoms = self._atoms - n0, n1, n2, n3 = self._stereo_allenes[c] - if nn == n0: # same start - t0 = 0 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n1: - t0 = 1 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - elif nn == n2 or n2 is None and atoms[nn].atomic_number == 1: - t0 = 2 - if nm == n1: - t1 = 1 - elif nm == n3 or n3 is None and atoms[nm].atomic_number == 1: - t1 = 3 - else: - raise KeyError - elif nn == n3 or n3 is None and atoms[nn].atomic_number == 1: - t0 = 3 - if nm == n0: - t1 = 0 - elif nm == n2 or n2 is None and atoms[nm].atomic_number == 1: - t1 = 2 - else: - raise KeyError - else: - raise KeyError - - if _alkene_translate[(t0, t1)]: - return not s - return s - - def _cumulenes(self: 'Container', heteroatoms=False): - atoms = self._atoms - bonds = self._bonds - - adj = defaultdict(set) # double bonds adjacency matrix - if heteroatoms: - for n, atom in atoms.items(): - if atom.atomic_number in _heteroatoms: - adj_n = adj[n].add - for m, bond in bonds[n].items(): - if int(bond) == 2 and atoms[m].atomic_number in _heteroatoms: - adj_n(m) - else: - for n, atom in atoms.items(): - if atom.atomic_number == 6: - adj_n = adj[n].add - for m, bond in bonds[n].items(): - if int(bond) == 2 and atoms[m].atomic_number == 6: - adj_n(m) - if not adj: - return () - - terminals = [x for x, y in adj.items() if len(y) == 1] - cumulenes = [] - while terminals: - n = terminals.pop(0) - m = adj[n].pop() - path = [n, m] - while m not in terminals: - adj_m = adj[m] - if len(adj_m) > 2: # not cumulene. SO3 etc. - cumulenes.extend(zip(path, path[1:])) # keep single double bonds. - break - adj_m.discard(n) - n, m = m, adj_m.pop() - path.append(m) - else: - terminals.remove(m) - adj[m].pop() - cumulenes.append(tuple(path)) - return cumulenes - - @cached_property - def _stereo_cumulenes(self: 'Container') -> Dict[Tuple[int, ...], Tuple[int, int, Optional[int], Optional[int]]]: - """ - Cumulenes which contains at least one non-hydrogen neighbor on both ends - """ - # 5 4 - # \ / - # 2---3 - # / \ - # 1 6 - bonds = self._bonds - atoms = self._atoms - cumulenes = {} - for path in self.cumulenes: - nf = bonds[path[0]] - nl = bonds[path[-1]] - n1, m1 = path[1], path[-2] - if any(b.order == 3 or atoms[m].atomic_number not in _organic_subset and b.order != 8 - for m, b in nf.items() if m != n1): - continue # skip X=C=C structures and metal-carbon complexes - if any(b.order == 3 or atoms[m].atomic_number not in _organic_subset and b.order != 8 - for m, b in nl.items() if m != m1): - continue # skip X=C=C structures and metal-carbon complexes - nn = [x for x, b in nf.items() if x != n1 and atoms[x].atomic_number != 1 and b.order != 8] - mn = [x for x, b in nl.items() if x != m1 and atoms[x].atomic_number != 1 and b.order != 8] - if nn and mn: - sn = nn[1] if len(nn) == 2 else None - sm = mn[1] if len(mn) == 2 else None - cumulenes[path] = (nn[0], mn[0], sn, sm) - return cumulenes - - @cached_property - def _stereo_tetrahedrons(self: 'Container') -> Dict[int, Union[Tuple[int, int, int], Tuple[int, int, int, int]]]: - """ - Tetrahedrons which contains at least 3 non-hydrogen neighbors - """ - # 2 - # | - # 1--K--3 - # | - # 4? - atoms = self._atoms - bonds = self._bonds - tetrahedrons = {} - for n in self.tetrahedrons: - if any(atoms[x].atomic_number not in _organic_subset for x in bonds[n]): - continue # skip metal-carbon complexes - env = tuple(x for x in bonds[n] if atoms[x].atomic_number != 1) - if len(env) in (3, 4): - tetrahedrons[n] = env - return tetrahedrons - - @cached_property - def _stereo_cis_trans(self) -> Dict[Tuple[int, int], Tuple[int, int, Optional[int], Optional[int]]]: - """ - Cis-trans bonds which contains at least one non-hydrogen neighbor on both ends - """ - return {(n, m): env for (n, *mid, m), env in self._stereo_cumulenes.items() if not len(mid) % 2} - - @cached_property - def _stereo_cis_trans_paths(self) -> Dict[Tuple[int, int], Tuple[int, ...]]: - return {(path[0], path[-1]): path for path in self._stereo_cumulenes if not len(path) % 2} - - @cached_property - def _stereo_cis_trans_terminals(self) -> Dict[int, Tuple[int, int]]: - """ - Cis-Trans terminal atoms to cis-trans key mapping - """ - terminals = {} - for nm in self._stereo_cis_trans_paths: - n, m = nm - terminals[n] = terminals[m] = nm - return terminals - - @cached_property - def _stereo_cis_trans_counterpart(self) -> Dict[int, int]: - """ - Cis-Trans terminal atoms counterparts - """ - counterpart = {} - for nm in self._stereo_cis_trans_paths: - n, m = nm - counterpart[n] = m - counterpart[m] = n - return counterpart - - @cached_property - def _stereo_allenes(self) -> Dict[int, Tuple[int, int, Optional[int], Optional[int]]]: - """ - Allenes which contains at least one non-hydrogen neighbor on both ends - """ - return {path[len(path) // 2]: env for path, env in self._stereo_cumulenes.items() if len(path) % 2} - - @cached_property - def _stereo_allenes_centers(self) -> Dict[int, int]: - """ - Allene terminal atom to center mapping - """ - terminals = {} - for c, (n, m) in self._stereo_allenes_terminals.items(): - terminals[n] = terminals[m] = c - return terminals - - @cached_property - def _stereo_allenes_terminals(self) -> Dict[int, Tuple[int, int]]: - """ - Allene center atom to terminals mapping - """ - return {c: (path[0], path[-1]) for c, path in self._stereo_allenes_paths.items()} - - @cached_property - def _stereo_allenes_paths(self) -> Dict[int, Tuple[int, ...]]: - return {path[len(path) // 2]: path for path in self._stereo_cumulenes if len(path) % 2} - - -__all__ = ['Stereo'] diff --git a/chython/algorithms/stereo/molecule.py b/chython/algorithms/stereo/molecule.py deleted file mode 100644 index 016df003..00000000 --- a/chython/algorithms/stereo/molecule.py +++ /dev/null @@ -1,809 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019-2023 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from collections import defaultdict -from functools import cached_property -from itertools import combinations, product -from logging import getLogger, INFO -from typing import Dict, Set, Tuple, Union, TYPE_CHECKING -from .graph import Stereo -from ..morgan import _morgan -from ...exceptions import AtomNotFound, IsChiral, NotChiral - - -logger = getLogger('chython.stereo') -logger.setLevel(INFO) - - -if TYPE_CHECKING: - from chython import MoleculeContainer - - -def _pyramid_sign(n, u, v, w): - # - # | n / - # | |\ - # | | \ - # | /| \ - # | / u---v - # |/___\_/___ - # w - # - nx, ny, nz = n - ux, uy, uz = u - vx, vy, vz = v - wx, wy, wz = w - - q1x = ux - nx - q1y = uy - ny - q1z = uz - nz - q2x = vx - nx - q2y = vy - ny - q2z = vz - nz - q3x = wx - nx - q3y = wy - ny - q3z = wz - nz - - vol = q1x * (q2y * q3z - q2z * q3y) + q1y * (q2z * q3x - q2x * q3z) + q1z * (q2x * q3y - q2y * q3x) - if vol > 0: - return 1 - elif vol < 0: - return -1 - return 0 - - -def _cis_trans_sign(n, u, v, w): - # n w - # \ / - # u--v - # / \ - # x x - nx, ny = n - ux, uy = u - vx, vy = v - wx, wy = w - - q1x = ux - nx - q1y = uy - ny - q2x = vx - ux - q2y = vy - uy - q3x = wx - vx - q3y = wy - vy - - # cross vectors - q1q2z = q1x * q2y - q1y * q2x - q2q3z = q2x * q3y - q2y * q3x - - dot = q1q2z * q2q3z - if dot > 0: - return 1 - elif dot < 0: - return -1 - return 0 - - -def _allene_sign(mark, u, v, w): - # n w - # | / - # u--v - ux, uy = u - vx, vy = v - wx, wy = w - - q2x = vx - ux - q2y = vy - uy - q3x = wx - vx - q3y = wy - vy - - # cross vectors - q2q3z = q2x * q3y - q2y * q3x - - dot = -mark * q2q3z - if dot > 0: - return 1 - elif dot < 0: - return -1 - return 0 - - -class MoleculeStereo(Stereo): - __slots__ = () - - def add_wedge(self: 'MoleculeContainer', n: int, m: int, mark: int, *, clean_cache=True): - """ - Add stereo data by wedge notation of bonds. Use it for tetrahedrons of allenes. - - :param n: number of atom from which wedge bond started - :param m: number of atom to which wedge bond coming - :param mark: up bond is 1, down is -1 - """ - if n not in self._atoms: - raise AtomNotFound - if n in self._atoms_stereo: - raise IsChiral - - plane = self._plane - if n in self._chiral_tetrahedrons: - if m not in self._bonds[n]: - raise AtomNotFound - th = self._stereo_tetrahedrons[n] - if self._atoms[m].atomic_number == 1: - s = _pyramid_sign((*plane[m], mark), *((*plane[x], 0) for x in th)) - else: - order = [(*plane[x], mark if x == m else 0) for x in th] - if len(order) == 3: - if len(self._bonds[n]) == 4: # explicit hydrogen - x = next(x for x in self._bonds[n] if x not in th) - s = _pyramid_sign((*plane[x], 0), *order) - else: - s = _pyramid_sign((*plane[n], 0), *order) - else: - s = _pyramid_sign(order[-1], *order[:3]) - if s: - self._atoms_stereo[n] = s > 0 - if clean_cache: - self.flush_cache() - else: - c = self._stereo_allenes_centers.get(n) - if c: - if c in self._allenes_stereo: - raise IsChiral - elif c not in self._chiral_allenes: - raise NotChiral - - t1, t2 = self._stereo_allenes_terminals[c] - order = self._stereo_allenes[c] - if self._atoms[m].atomic_number == 1: - if t1 == n: - m1 = order[1] - else: - t1, t2 = t2, t1 - m1 = order[0] - r = True - else: - w = order.index(m) - if w == 0: - m1 = order[1] - r = False - elif w == 1: - m1 = order[0] - t1, t2 = t2, t1 - r = False - elif w == 2: - m1 = order[1] - r = True - else: - m1 = order[0] - t1, t2 = t2, t1 - r = True - s = _allene_sign(mark, plane[t1], plane[t2], plane[m1]) - if s: - self._allenes_stereo[c] = s < 0 if r else s > 0 - if clean_cache: - self.flush_cache() - else: - # only tetrahedrons and allenes supported - raise NotChiral - - def calculate_cis_trans_from_2d(self: 'MoleculeContainer', *, clean_cache=True): - """ - Calculate cis-trans stereo bonds from given 2d coordinates. Unusable for SMILES and INCHI. - """ - cis_trans_stereo = self._cis_trans_stereo - plane = self._plane - flag = False - while self._chiral_cis_trans: - stereo = {} - for nm in self._chiral_cis_trans: - n, m = nm - n1, m1, *_ = self._stereo_cis_trans[nm] - s = _cis_trans_sign(plane[n1], plane[n], plane[m], plane[m1]) - if s: - stereo[nm] = s > 0 - if stereo: - cis_trans_stereo.update(stereo) - flag = True - self.flush_stereo_cache() - else: - break - if flag and clean_cache: - self.flush_cache() - - def add_atom_stereo(self: 'MoleculeContainer', n: int, env: Tuple[int, ...], mark: bool, *, clean_cache=True): - """ - Add stereo data for specified neighbors bypass. Use it for tetrahedrons or allenes. - - :param n: number of tetrahedron atom or central atom of allene. - :param env: numbers of atoms with specified bypass - :param mark: clockwise or anti bypass. - - See and - """ - if n not in self._atoms: - raise AtomNotFound - if n in self._atoms_stereo or n in self._allenes_stereo: - raise IsChiral - if not isinstance(mark, bool): - raise TypeError('stereo mark should be bool') - - if n in self._chiral_tetrahedrons: - self._atoms_stereo[n] = self._translate_tetrahedron_sign(n, env, mark) - if clean_cache: - self.flush_cache() - elif n in self._chiral_allenes: - self._allenes_stereo[n] = self._translate_allene_sign(n, *env, mark) - if clean_cache: - self.flush_cache() - else: # only tetrahedrons supported - raise NotChiral - - def add_cis_trans_stereo(self: 'MoleculeContainer', n: int, m: int, n1: int, n2: int, mark: bool, *, - clean_cache=True): - """ - Add stereo data to cis-trans double bonds (not allenes). - - n1/n=m/n2 - - :param n: number of starting atom of double bonds chain (alkenes of cumulenes) - :param m: number of ending atom of double bonds chain (alkenes of cumulenes) - :param n1: number of neighboring atom of starting atom - :param n2: number of neighboring atom of ending atom - :param mark: cis or trans - - See and Set[int]: - return self.__chiral_centers[0] - - @property - def _chiral_cis_trans(self) -> Set[Tuple[int, int]]: - return self.__chiral_centers[1] - - @property - def _chiral_allenes(self) -> Set[int]: - return self.__chiral_centers[2] - - @cached_property - def _chiral_morgan(self: Union['MoleculeContainer', 'MoleculeStereo']) -> Dict[int, int]: - if not self._atoms_stereo and not self._allenes_stereo and not self._cis_trans_stereo: - return self.atoms_order - morgan = self.atoms_order.copy() - atoms_stereo = set(self._atoms_stereo) - cis_trans_stereo = set(self._cis_trans_stereo) - allenes_stereo = set(self._allenes_stereo) - while True: - # try iteratively differentiate stereo atoms. - morgan, atoms_stereo, cis_trans_stereo, allenes_stereo, atoms_groups, cis_trans_groups, allenes_groups = \ - self.__differentiation(morgan, atoms_stereo, cis_trans_stereo, allenes_stereo) - if not atoms_groups and not cis_trans_groups and not allenes_groups: - break - # for some rings differentiation by morgan impossible. try randomly set new weights. - # sometimes this will lead to pseudo chiral centers and non-unique morgan. - for group in atoms_groups: - for n in group[:len(group) // 2]: # set new weight in half of group randomly. - morgan[n] = -morgan[n] - for group in cis_trans_groups: - for n, _ in group[:len(group) // 2]: # set new weight in half of group randomly. - morgan[n] = -morgan[n] - for group in allenes_groups: - for n in group[:len(group) // 2]: # set new weight in half of group randomly. - morgan[n] = -morgan[n] - morgan = _morgan(morgan, self.int_adjacency) - return morgan - - @cached_property - def _rings_tetrahedrons_linkers(self: 'MoleculeContainer') -> Dict[int, Tuple[int, int, int, int]]: - """ - Ring-linkers tetrahedrons. - - Values are neighbors in first and second rings. - """ - out = {} - tetrahedrons = self._stereo_tetrahedrons - for n, r in self.atoms_rings.items(): - if n in tetrahedrons: - for nr, mr in combinations(r, 2): - if len(set(nr).intersection(mr)) == 1: - ni = nr.index(n) - mi = mr.index(n) - out[n] = (nr[ni - 1], nr[ni - len(nr) + 1], mr[mi - 1], mr[mi - len(mr) + 1]) - break - return out - - @cached_property - def _rings_tetrahedrons(self: 'MoleculeContainer') -> Dict[int, Union[Tuple[int, int], Tuple[int], Tuple]]: - """ - Tetrahedrons in rings, except ring-linkers. - - Values are out of ring atoms. - """ - out = {} - atoms_rings = self.atoms_rings - tetrahedrons = self._stereo_tetrahedrons - points = self._rings_tetrahedrons_linkers - environment = self.not_special_connectivity - for n, r in atoms_rings.items(): - if n in tetrahedrons and n not in points: - out[n] = tuple(environment[n].difference(atoms_rings)) - return out - - @cached_property - def _rings_cumulenes_linkers(self: 'MoleculeContainer') -> Dict[Tuple[int, int], Tuple[int, int, int, int]]: - """ - Ring-linkers cumulenes except chords. - - Values are neighbors in first and second rings. - """ - out = {} - ar = self.atoms_rings - chord = self._rings_cumulenes - for (n, *_, m), (n1, m1, n2, m2) in self._stereo_cumulenes.items(): - if n in ar and m in ar and (n, m) not in chord: - out[(n, m)] = (n1, n2, m1, m2) - return out - - @cached_property - def _rings_cumulenes(self: 'MoleculeContainer') -> Set[Tuple[int, int]]: - """ - Cumulenes in rings always chiral. - """ - out = set() - ar = self.atoms_rings - for n, *_, m in self._stereo_cumulenes: - if n in ar and m in ar and not set(ar[n]).isdisjoint(ar[m]): - out.add((n, m)) - return out - - @cached_property - def _rings_cumulenes_attached(self: 'MoleculeContainer') -> Dict[Tuple[int, int], - Union[Tuple[int, int], Tuple[int]]]: - """ - Cumulenes attached to rings. - - Values are out of ring atoms. - """ - ar = self.atoms_rings - out = {} - for (n, *_, m), (n1, m1, n2, m2) in self._stereo_cumulenes.items(): - if n in ar: - if m in ar: - continue - if m2: - out[(n, m)] = (m1, m2) - else: - out[(n, m)] = (m1,) - elif m in ar: - if n2: - out[(n, m)] = (n1, n2) - else: - out[(n, m)] = (n1,) - return out - - @cached_property - def __chiral_centers(self: Union['MoleculeStereo', 'MoleculeContainer']): - atoms_rings = self.atoms_rings - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans - allenes_centers = self._stereo_allenes_centers - cis_trans_terminals = self._stereo_cis_trans_terminals - morgan = self._chiral_morgan - - # find new chiral atoms and bonds. - # tetrahedron is chiral if all its neighbors are unique. - chiral_t = {n for n, env in tetrahedrons.items() if len({morgan[x] for x in env}) == len(env)} - # tetrahedrons-linkers is chiral if in each rings neighbors are unique. - chiral_t.update(n for n, (n1, n2, m1, m2) in self._rings_tetrahedrons_linkers.items() - if morgan[n1] != morgan[n2] and morgan[m1] != morgan[m2]) - - # required for axes detection. - graph = {} - stereogenic = set() - pseudo = {} - - # double bond is chiral if neighbors of each terminal atom is unique. - # ring-linkers and rings-attached also takes into account. - chiral_c = set() - chiral_a = set() - for path, (n1, m1, n2, m2) in self._stereo_cumulenes.items(): - if morgan[n1] != morgan.get(n2, 0) and morgan[m1] != morgan.get(m2, 0): - n, m = path[0], path[-1] - if len(path) % 2: - chiral_a.add(path[len(path) // 2]) - else: - chiral_c.add((n, m)) - stereogenic.add(n) - stereogenic.add(m) - # ring cumulenes always chiral. can be already added. - for nm in self._rings_cumulenes: - n, m = nm - if any(len(x) < 8 for x in atoms_rings[n]): # skip small rings. - if nm in chiral_c: # remove already added small rings cumulenes. - chiral_c.discard(nm) - elif n in allenes_centers and (c := allenes_centers[n]) in chiral_a: - chiral_a.discard(c) - continue - elif nm in cis_trans: - chiral_c.add(nm) - else: - chiral_a.add(allenes_centers[n]) - pseudo[m] = n - graph[n] = set() - stereogenic.add(n) - - # find chiral axes. build graph of stereogenic atoms in rings. - # atoms connected then located in same ring or cumulene. - for n, env in self._rings_tetrahedrons.items(): - if len(env) == 2: # one or zero non-ring neighbors stereogenic. - n1, n2 = env - if morgan[n1] == morgan[n2]: # only unique non-ring members required. - continue - graph[n] = set() - stereogenic.add(n) # non-linker tetrahedrons in rings - stereogenic. - for n, (n1, n2, m1, m2) in self._rings_tetrahedrons_linkers.items(): - graph[n] = set() - if morgan[n1] != morgan[n2] or morgan[m1] != morgan[m2]: - stereogenic.add(n) # linkers with at least one unsymmetric ring. - for n, m in self._rings_cumulenes_linkers: - graph[n] = {m} - graph[m] = {n} - # stereogenic atoms already found. - for (n, m), env in self._rings_cumulenes_attached.items(): - if len(env) == 2: - n1, n2 = env - if morgan[n1] == morgan[n2]: # only unique non-ring members required. - continue - if n in atoms_rings: - graph[n] = set() # non ring endpoints not required. - stereogenic.add(n) # mark as stereogenic - else: - graph[m] = set() - stereogenic.add(m) - - if len(graph) > 1: # add bonds to graph. bonds connects atoms in same rings and terminal atoms of cumulenes. - for n, ms in graph.items(): - for r in atoms_rings[n]: - for m in r: - if n == m: - continue - elif m in graph: - ms.add(m) - elif m in pseudo and (m := pseudo[m]) != n: - ms.add(m) - # remove not stereogenic terminals. - while True: - try: - n = next(n for n, ms in graph.items() if not ms or len(ms) == 1 and n not in stereogenic) - except StopIteration: - break - for m in graph.pop(n): - graph[m].discard(n) - # update chiral atoms. - for n in graph: - if n in tetrahedrons: - chiral_t.add(n) - elif n in allenes_centers: - chiral_a.add(allenes_centers[n]) - else: - chiral_c.add(cis_trans_terminals[n]) - - # skip already marked. - chiral_t.difference_update(self._atoms_stereo) - chiral_a.difference_update(self._allenes_stereo) - chiral_c.difference_update(self._cis_trans_stereo) - return chiral_t, chiral_c, chiral_a - - def __differentiation(self: Union['MoleculeStereo', 'MoleculeContainer'], morgan, - atoms_stereo, cis_trans_stereo, allenes_stereo): - bonds = self.int_adjacency - - tetrahedrons = self._stereo_tetrahedrons - cis_trans = self._stereo_cis_trans - allenes = self._stereo_allenes - - translate_tetrahedron = self._translate_tetrahedron_sign - translate_cis_trans = self._translate_cis_trans_sign - translate_allene = self._translate_allene_sign - - while True: - morgan_update = {} - atoms_groups = [] - cis_trans_groups = [] - allenes_groups = [] - # recalculate morgan weights with taking into account existing stereo marks. - if atoms_stereo: - grouped_stereo = defaultdict(list) - for n in atoms_stereo: - grouped_stereo[morgan[n]].append(n) # collect equal stereo atoms. - for group in grouped_stereo.values(): - if not len(group) % 2: # only even number of equal stereo atoms give new stereo center. - # process only truly stereogenic. - if len(env := tetrahedrons[group[0]]) == len({morgan[x] for x in env}): - s = [n for n in group if translate_tetrahedron(n, sorted(tetrahedrons[n], key=morgan.get))] - if 0 < len(s) < len(group): # RS pair required. - for m in s: - morgan_update[m] = -morgan[m] - for n in group: # prevent checks repeating. - atoms_stereo.discard(n) - else: # stereo group in rings. unambiguous environment order impossible. - atoms_groups.append(group) - - if cis_trans_stereo: - grouped_stereo = defaultdict(list) - for nm in cis_trans_stereo: - n, m = nm - if (mn := morgan[n]) <= (mm := morgan[m]): - grouped_stereo[mn].append((n, nm)) - else: - grouped_stereo[mm].append((m, nm)) - for group in grouped_stereo.values(): - if not len(group) % 2: # only even number of equal stereo bonds give new stereo center. - n1, m1, n2, m2 = cis_trans[group[0][1]] - if morgan[n1] != morgan.get(n2, 0) and morgan[m1] != morgan.get(m2, 0): - s = [] - for x, nm in group: - n, m = nm - n1, m1, n2, m2 = cis_trans[nm] - if n2 is None: - a = n1 - else: - a = min(n1, n2, key=morgan.get) - if m2 is None: - b = m1 - else: - b = min(m1, m2, key=morgan.get) - if translate_cis_trans(n, m, a, b): - s.append(x) - if 0 < len(s) < len(group): # RS pair required. - for n in s: - morgan_update[n] = -morgan[n] - for _, nm in group: - cis_trans_stereo.discard(nm) - else: - cis_trans_groups.append(group) - - if allenes_stereo: - grouped_stereo = defaultdict(list) - for c in allenes_stereo: - grouped_stereo[morgan[c]].append(c) - for group in grouped_stereo.values(): - if not len(group) % 2: # only even number of equal stereo bonds give new stereo center. - n1, m1, n2, m2 = allenes[group[0]] - if morgan[n1] != morgan.get(n2, 0) and morgan[m1] != morgan.get(m2, 0): - s = [] - for c in group: - n1, m1, n2, m2 = allenes[c] - if n2 is None: - a = n1 - else: - a = min(n1, n2, key=morgan.get) - if m2 is None: - b = m1 - else: - b = min(m1, m2, key=morgan.get) - if translate_allene(c, a, b): - s.append(c) - if 0 < len(s) < len(group): # RS pair required. - for c in s: - morgan_update[c] = -morgan[c] - for c in group: - allenes_stereo.discard(c) - else: - allenes_groups.append(group) - if not morgan_update: - break - morgan = _morgan({**morgan, **morgan_update}, bonds) - return morgan, atoms_stereo, cis_trans_stereo, allenes_stereo, atoms_groups, cis_trans_groups, allenes_groups - - -__all__ = ['MoleculeStereo'] diff --git a/chython/algorithms/tautomers/__init__.py b/chython/algorithms/tautomers/__init__.py index 7a628c6d..e180eaef 100644 --- a/chython/algorithms/tautomers/__init__.py +++ b/chython/algorithms/tautomers/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # Copyright 2020 Nail Samikaev # This file is part of chython. # @@ -51,47 +51,25 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar """ if limit < 1: raise ValueError('limit should be greater or equal 1') - - has_stereo = bool(self._atoms_stereo or self._allenes_stereo or self._cis_trans_stereo) counter = 0 - copy = self.copy() - copy.clean_stereo() - # sssr, neighbors and heteroatoms are same for all tautomers. - # prevent recalculation by sharing cache. - self.__set_cache(copy) + copy = self.copy(keep_sssr=True, keep_components=True) if prepare_molecules: # transform to kekule form without hydrogens - k = copy.kekule() - i = copy.implicify_hydrogens(_fix_stereo=False) - if k or i: # reset cache after flush - self.__set_cache(copy) - - thiele = copy.copy() # transform to thiele to prevent duplicates and dearomatization - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): - self.__set_cache(thiele) - - # return origin structure as first tautomer - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + copy.kekule() + copy.implicify_hydrogens(_fix_stereo=False) + + # transform to thiele to prevent duplicates and dearomatization + thiele = copy.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) + yield thiele # return original structure as first tautomer seen = {thiele: None} # value is parent molecule - required for preventing migrations in sugars. # first try to neutralize if copy.neutralize(_fix_stereo=False): # found neutral form - thiele = copy.copy() - self.__set_cache(copy) # restore cache - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): - self.__set_cache(thiele) - - # return found neutral form - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + thiele = copy.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) + yield thiele counter += 1 seen[thiele] = None @@ -107,11 +85,8 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current, thiele_current = queue.popleft() for mol, ket in current._enumerate_keto_enol_tautomers(partial): - thiele = mol.copy() - self.__set_cache(mol) - self.__set_cache(thiele) - if thiele.thiele(fix_tautomers=False): # reset cache after flush_cache. - self.__set_cache(thiele) + thiele = mol.copy(keep_sssr=True, keep_components=True) + thiele.thiele(fix_tautomers=False) if thiele not in seen: seen[thiele] = current @@ -124,10 +99,7 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar queue = deque([(mol, thiele)]) new_queue = [thiele] copy = mol # new entry point. - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + yield thiele break if keep_sugars and current is not copy and ket: # prevent carbonyl migration in sugars. skip entry point. @@ -138,10 +110,7 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar queue.append((mol, thiele)) new_queue.append(thiele) - if has_stereo: - yield self.__set_stereo(thiele.copy()) - else: - yield thiele + yield thiele counter += 1 if counter == limit: return @@ -152,15 +121,11 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current = queue.popleft() for mol in current._enumerate_hetero_arene_tautomers(): - self.__set_cache(mol) if mol not in seen: seen[mol] = None queue.append(mol) new_queue.append(mol) # new hetero-arenes also should be included to this list. - if has_stereo: - yield self.__set_stereo(mol.copy()) - else: - yield mol + yield mol counter += 1 if counter == limit: return @@ -171,14 +136,10 @@ def enumerate_tautomers(self: Union['MoleculeContainer', 'Tautomers'], *, prepar while queue: current = queue.popleft() for mol in current._enumerate_zwitter_tautomers(): - self.__set_cache(mol) if mol not in seen: seen[mol] = None queue.append(mol) - if has_stereo: - yield self.__set_stereo(mol.copy()) - else: - yield mol + yield mol counter += 1 if counter == limit: return @@ -206,34 +167,5 @@ def enumerate_charged_tautomers(self: 'MoleculeContainer', *, prepare_molecules= if count == limit: return - def __set_cache(self: 'MoleculeContainer', mol): - try: - neighbors = self.__dict__['__cached_args_method_neighbors'] - except KeyError: - neighbors = self.__dict__['__cached_args_method_neighbors'] = {} - try: - heteroatoms = self.__dict__['__cached_args_method_heteroatoms'] - except KeyError: - heteroatoms = self.__dict__['__cached_args_method_heteroatoms'] = {} - try: - is_ring_bond = self.__dict__['__cached_args_method_is_ring_bond'] - except KeyError: - is_ring_bond = self.__dict__['__cached_args_method_is_ring_bond'] = {} - - mol.__dict__['sssr'] = self.sssr # thiele/kekule - mol.__dict__['ring_atoms'] = self.ring_atoms # morgan - mol.__dict__['_connected_components'] = self._connected_components # isomorphism - mol.__dict__['atoms_rings_sizes'] = self.atoms_rings_sizes # isomorphism - mol.__dict__['__cached_args_method_neighbors'] = neighbors # isomorphism - mol.__dict__['__cached_args_method_heteroatoms'] = heteroatoms # isomorphism - mol.__dict__['__cached_args_method_is_ring_bond'] = is_ring_bond # isomorphism - - def __set_stereo(self: 'MoleculeContainer', mol): - mol._atoms_stereo.update(self._atoms_stereo) - mol._allenes_stereo.update(self._allenes_stereo) - mol._cis_trans_stereo.update(self._cis_trans_stereo) - mol.fix_stereo() - return mol - __all__ = ['Tautomers'] diff --git a/chython/algorithms/tautomers/acid_base.py b/chython/algorithms/tautomers/acid_base.py index bb1a672f..4323b0c8 100644 --- a/chython/algorithms/tautomers/acid_base.py +++ b/chython/algorithms/tautomers/acid_base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -44,9 +44,8 @@ def neutralize(self: 'MoleculeContainer', *, keep_charge=True, logging=False, return [] return False - self._charges.update(mol._charges) - self._hydrogens.update(mol._hydrogens) - self.flush_cache() + self._atoms = mol._atoms + self.flush_cache(keep_sssr=True, keep_components=True) if _fix_stereo: self.fix_stereo() if logging: @@ -85,14 +84,16 @@ def enumerate_charged_forms(self: 'MoleculeContainer', *, deep: int = 4, limit: continue uniq.add(dc) seen_combo.add((dc, ac)) - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) for n in ac: - mol._hydrogens[n] += 1 - mol._charges[n] += 1 + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for n in dc: if n is not None: - mol._hydrogens[n] -= 1 - mol._charges[n] -= 1 + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 if mol not in seen: seen.add(mol) yield mol @@ -109,15 +110,17 @@ def enumerate_charged_forms(self: 'MoleculeContainer', *, deep: int = 4, limit: uniq.add(ac) if (dc, ac) in seen_combo: continue - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) for n in ac: if n is not None: - mol._hydrogens[n] += 1 - mol._charges[n] += 1 + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for n in dc: if n is not None: - mol._hydrogens[n] -= 1 - mol._charges[n] -= 1 + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 if mol not in seen: seen.add(mol) yield mol @@ -139,44 +142,52 @@ def _neutralize(self: 'MoleculeContainer', keep_charge=True): if not donors or not acceptors: return # neutralization impossible elif len(donors) > len(acceptors): - copy = self.copy() - for a in acceptors: - copy._hydrogens[a] += 1 - copy._charges[a] += 1 + copy = self.copy(keep_sssr=True, keep_components=True) + for n in acceptors: + a = copy._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 for c in combinations(donors, len(acceptors)): - mol = copy.copy() - for d in c: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 + mol = copy.copy(keep_sssr=True, keep_components=True) + for n in c: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 yield mol, acceptors.union(c) elif len(donors) < len(acceptors): - copy = self.copy() - for d in donors: - copy._hydrogens[d] -= 1 - copy._charges[d] -= 1 + copy = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = copy._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 for c in combinations(acceptors, len(donors)): - mol = copy.copy() - for a in c: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = copy.copy(keep_sssr=True, keep_components=True) + for n in c: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors.union(c) else: # balanced! - mol = self.copy() - for d in donors: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 - for a in acceptors: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 + for n in acceptors: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors | acceptors elif donors or acceptors: - mol = self.copy() - for d in donors: - mol._hydrogens[d] -= 1 - mol._charges[d] -= 1 - for a in acceptors: - mol._hydrogens[a] += 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + for n in donors: + a = mol._atoms[n] + a._implicit_hydrogens -= 1 + a._charge -= 1 + for n in acceptors: + a = mol._atoms[n] + a._implicit_hydrogens += 1 + a._charge += 1 yield mol, donors | acceptors def _enumerate_zwitter_tautomers(self: 'MoleculeContainer'): @@ -190,11 +201,13 @@ def _enumerate_zwitter_tautomers(self: 'MoleculeContainer'): acceptors.add(mapping[1]) for d, a in product(donors, acceptors): - mol = self.copy() - mol._hydrogens[d] -= 1 - mol._hydrogens[a] += 1 - mol._charges[d] -= 1 - mol._charges[a] += 1 + mol = self.copy(keep_sssr=True, keep_components=True) + d = mol._atoms[d] + a = mol._atoms[a] + d._implicit_hydrogens -= 1 + a._implicit_hydrogens += 1 + d._charge -= 1 + a._charge += 1 yield mol diff --git a/chython/algorithms/tautomers/heteroarenes.py b/chython/algorithms/tautomers/heteroarenes.py index 81837438..99a154f4 100644 --- a/chython/algorithms/tautomers/heteroarenes.py +++ b/chython/algorithms/tautomers/heteroarenes.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -27,20 +27,24 @@ from chython import MoleculeContainer +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 + + class HeteroArenes: __slots__ = () def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): atoms = self._atoms bonds = self._bonds - hydrogens = self._hydrogens - charges = self._charges - radicals = self._radicals rings = defaultdict(list) # aromatic skeleton for n, m_bond in bonds.items(): for m, bond in m_bond.items(): - if bond.order == 4: + if bond == 4: rings[n].append(m) if not rings: return @@ -49,19 +53,20 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): donors = set() single_bonded = set() for n, ms in rings.items(): + a = atoms[n] if len(ms) == 2: - if atoms[n].atomic_number in (5, 7, 15): - if not charges[n] and not radicals[n]: + if a in (B, N, P): + if not a.charge and not a.is_radical: # only neutral B, N, P - if hydrogens[n]: # pyrrole + if a.implicit_hydrogens: # pyrrole donors.add(n) elif len(bonds[n]) == 2: # pyridine acceptors.add(n) else: single_bonded.add(n) - elif charges[n] == -1 and atoms[n].atomic_number == 6: # ferrocene + elif a.charge == -1 and a == C: # ferrocene single_bonded.add(n) - elif len(ms) == 3 and atoms[n].atomic_number in (5, 7, 15) and not charges[n] and not radicals[n]: + elif len(ms) == 3 and a in (B, N, P) and not a.charge and not a.is_radical: single_bonded.add(n) if not donors or not acceptors: return @@ -94,9 +99,9 @@ def _enumerate_hetero_arene_tautomers(self: 'MoleculeContainer'): next(_kekule_component(component, sb, (), 0)) except InvalidAromaticRing: continue - mol = self.copy() - mol._hydrogens[d] = 0 - mol._hydrogens[a] = 1 + mol = self.copy(keep_sssr=True, keep_components=True) + mol._atoms[d]._implicit_hydrogens = 0 + mol._atoms[a]._implicit_hydrogens = 1 yield mol diff --git a/chython/algorithms/tautomers/keto_enol.py b/chython/algorithms/tautomers/keto_enol.py index acad2241..ba80f63b 100644 --- a/chython/algorithms/tautomers/keto_enol.py +++ b/chython/algorithms/tautomers/keto_enol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -27,6 +27,10 @@ from chython import MoleculeContainer +# atomic number constants +C = 6 + + class KetoEnol: __slots__ = () @@ -39,13 +43,17 @@ def _enumerate_keto_enol_tautomers(self: Union['MoleculeContainer', 'KetoEnol'], a = fix[0][0] d = fix[-1][1] - mol = self.copy() + mol = self.copy(keep_sssr=True, keep_components=True) m_bonds = mol._bonds for n, m, b in fix: - m_bonds[n][m]._Bond__order = b - - mol._hydrogens[a] += 1 - mol._hydrogens[d] -= 1 + m_bonds[n][m]._order = b + + a = mol._atoms[a] + d = mol._atoms[d] + a._implicit_hydrogens += 1 + d._implicit_hydrogens -= 1 + a._hybridization -= 1 # -C=X>=C-X or -C=C=X>=C-C=X + d._hybridization += 1 yield mol, ket @cached_property @@ -59,8 +67,6 @@ def _sugar_groups(self): def __enumerate_bonds(self: 'MoleculeContainer', partial): atoms = self._atoms bonds = self._bonds - hydrogens = self._hydrogens - hybridization = self.hybridization rings = self.atoms_rings_sizes # search neutral oxygen and nitrogen @@ -83,11 +89,12 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): if partial and path and not len(path) % 2 and \ (hydrogen or # enol > ketone - hydrogens[(x := path[-1][1])] and (x not in rings or all(x > 7 for x in rings[x]))): # ketone> + atoms[(x := path[-1][1])].implicit_hydrogens and + (x not in rings or all(x > 7 for x in rings[x]))): # ketone> # return partial hops. ignore allenes in small rings. yield path, hydrogen if len(path) > depth: # fork found - if not partial and not len(path) % 2 and (hydrogen or hydrogens[path[-1][1]]): + if not partial and not len(path) % 2 and (hydrogen or atoms[path[-1][1]].implicit_hydrogens): # end of path found. return it and start new one. yield path, hydrogen seen.difference_update(x for _, x, _ in path[depth:]) @@ -109,33 +116,32 @@ def __enumerate_bonds(self: 'MoleculeContainer', partial): elif n in seen: # aromatic ring destruction. pyridine double bonds shift continue elif n in anti: # enol-ketone switch - if current in anti[n]: - if hydrogens: - if b.order == 2: - cp = path.copy() - cp.append((current, n, 1)) - yield cp, True - elif b.order == 1: + if current in anti[n]: # keton or enol bond + if hydrogen: + cp = path.copy() + cp.append((current, n, 1)) # double to single in keton end + yield cp, True + else: cp = path.copy() - cp.append((current, n, 2)) + cp.append((current, n, 2)) # single to double in enol end yield cp, False - elif b.order == bond and atoms[n].atomic_number == 6: # classic keto-enol route - hb = hybridization(n) - if hb == 2: # grow up + elif b == bond and (a := atoms[n]) == C: # classic keto-enol route + if a.hybridization == 2: # grow up stack.append((current, n, next_bond, depth)) elif hydrogen: - if hb == 3: # OC=CC=C=C case + if a.hybridization == 3: # OC=CC=C=C case cp = path.copy() cp.append((current, n, 1)) yield cp, True # ketone found - elif hb == 1 and hydrogens[n]: # ketone >> enol + elif a.hybridization == 1 and a.implicit_hydrogens: # ketone >> enol cp = path.copy() cp.append((current, n, 2)) yield cp, False if path and not len(path) % 2 and \ (hydrogen or # enol > ketone - hydrogens[(x := path[-1][1])] and (x not in rings or all(x > 7 for x in rings[x]))): + atoms[(x := path[-1][1])].implicit_hydrogens and + (x not in rings or all(x > 7 for x in rings[x]))): yield path, hydrogen diff --git a/tests/algorithms/tautomers/__init__.py b/chython/algorithms/tautomers/test/__init__.py similarity index 100% rename from tests/algorithms/tautomers/__init__.py rename to chython/algorithms/tautomers/test/__init__.py diff --git a/tests/algorithms/tautomers/test_tautomers.py b/chython/algorithms/tautomers/test/test_tautomers.py similarity index 100% rename from tests/algorithms/tautomers/test_tautomers.py rename to chython/algorithms/tautomers/test/test_tautomers.py diff --git a/chython/algorithms/test/__init__.py b/chython/algorithms/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/algorithms/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/chython/algorithms/test/test_isomorphism.py b/chython/algorithms/test/test_isomorphism.py new file mode 100644 index 00000000..4cb2c141 --- /dev/null +++ b/chython/algorithms/test/test_isomorphism.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# Copyright 2025 Tagir Akhmetshin +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles + + +def test_basic(): + # Test basic atom mapping in simple molecules + mol1 = smiles('CC(=O)O') # acetic acid + mol2 = smiles('CC(O)=O') # acetic acid + + assert mol1 <= mol2 + assert mol2 <= mol1 + + mappings = list(mol1.get_mapping(mol2)) + assert len(mappings) == 1 + assert mappings[0] == {1: 1, 2: 2, 3: 4, 4: 3} + assert not smiles('CC(O)O') <= mol1 + assert smiles('C[O-]') <= smiles('CC[O-]') + assert not smiles('C[O-]') <= smiles('CCO') + + +def test_substructure_mapping(): + # Test mapping of a substructure + mol = smiles('CCC(=O)OC') + substructure = smiles('CC(=O)O') + + assert substructure < mol + mappings = list(substructure.get_mapping(mol)) + assert len(mappings) == 1 + assert mappings[0] == {1: 2, 2: 3, 3: 4, 4: 5} + + +def test_multiple_mappings(): + # Test cases where multiple valid mappings exist + mol = smiles('CC(=O)OC(=O)C') + pattern = smiles('CC(=O)O') # acetone pattern + + mappings = list(pattern.get_mapping(mol)) + assert len(mappings) == 2 # should find multiple matches + assert {1: 1, 2: 2, 3: 3, 4: 4} in mappings + assert {1: 7, 2: 5, 3: 6, 4: 4} in mappings diff --git a/chython/algorithms/test/test_smiles.py b/chython/algorithms/test/test_smiles.py new file mode 100644 index 00000000..b61606ae --- /dev/null +++ b/chython/algorithms/test/test_smiles.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# Copyright 2025 Tagir Akhmetshin +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles + + +def test_basic_smiles(): + # Test basic SMILES generation + mol = smiles('CO') # methanol + assert str(mol) in ('CO', 'OC') + + mol = smiles('c1ccccc1') # benzene + assert str(mol) == 'c1ccccc1' + + +def test_format_options(): + # Test different format options + mol = smiles('C=1C=CC=CC=1') + + # Test asymmetric closures + assert str(mol) == 'C=1C=CC=CC=1' + assert format(mol, 'a') == 'C=1C=CC=CC1' + + assert format(mol, '!b') == 'C1CCCCC1' + + # Test disable stereo + mol = smiles('C[C@H](O)CC') + assert '@' in str(mol) + assert '@' not in format(mol, '!s') + + mol = smiles('c1ccccc1') + assert format(mol, 'A') == 'C:1:C:C:C:C:C:1' + assert format(mol, 'Aa') == 'C:1:C:C:C:C:C1' + assert format(mol, 'm') == '[cH:1]1[cH:2][cH:3][cH:4][cH:5][cH:6]1' + assert format(mol, 'h') == '[cH]1[cH][cH][cH][cH][cH]1' + + assert format(mol, 'Ah') == '[CH]:1:[CH]:[CH]:[CH]:[CH]:[CH]:1' + assert format(mol, 'Ah!b') == '[CH]1[CH][CH][CH][CH][CH]1' + + mol = smiles('[K+]') + assert str(mol) == '[K+]' + assert format(mol, '!z') == '[K]' + + mol = smiles('[CH3]') + assert str(mol) == '[CH3] |^1:0|' + assert format(mol, '!x') == '[CH3]' + + mol = smiles('CCO') + assert len({format(mol, 'r') for _ in range(50)}) == 4 + + +def test_smiles_comparison(): + # Test SMILES comparison functionality + mol1 = smiles('CCO') + mol2 = smiles('OCC') + mol3 = smiles('CCC') + + assert mol1 == mol2 # same molecules + assert mol1 != mol3 # different molecules + assert hash(mol1) == hash(mol2) # same hash for same molecules diff --git a/chython/algorithms/x3dom.py b/chython/algorithms/x3dom.py index f5da216d..73779280 100644 --- a/chython/algorithms/x3dom.py +++ b/chython/algorithms/x3dom.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # Copyright 2020 Dinar Batyrshin # This file is part of chython. # @@ -141,7 +141,13 @@ def depict3d(self: Union['MoleculeContainer', 'X3domMolecule'], index: int = 0) :param index: index of conformer """ - xyz = self._conformers[index] + if not hasattr(self, '_conformers'): + raise ValueError('No conformers stored within structure') + try: + xyz = self._conformers[index] + except IndexError: + raise IndexError('Invalid conformer index') + mx = sum(x for x, _, _ in xyz.values()) / len(xyz) my = sum(y for _, y, _ in xyz.values()) / len(xyz) mz = sum(z for _, _, z in xyz.values()) / len(xyz) @@ -175,7 +181,7 @@ def __render_atoms(self: 'MoleculeContainer', xyz): atoms = [] if carbon: - for n, a in self._atoms.items(): + for n, a in self.atoms(): r = radius or a.atomic_radius * multiplier fr = r * 0.71 atoms.append(f" \n" @@ -191,7 +197,7 @@ def __render_atoms(self: 'MoleculeContainer', xyz): f" \n \n" " \n \n \n \n") else: - for n, a in self._atoms.items(): + for n, a in self.atoms(): r = radius or a.atomic_radius * multiplier atoms.append(f" \n" " \n \n" @@ -215,7 +221,6 @@ def __render_bonds(self: 'MoleculeContainer', xyz): doubles = {} half_triple = triple_space / 2 for n, m, bond in self.bonds(): - order = bond.order nx, ny, nz = xyz[n] mx, my, mz = xyz[m] @@ -227,13 +232,13 @@ def __render_bonds(self: 'MoleculeContainer', xyz): rotation_angle = acos(nmy / length) lengths[(n, m)] = lengths[(m, n)] = (length, rotation_angle) x, y, z = nx + nmx / 2, ny + nmy / 2, nz + nmz / 2 - if order in (1, 4): + if bond in (1, 4): xml.append(f" \n \n \n" f" \n \n" f" \n \n" " \n \n \n") - elif order == 2: + elif bond == 2: if n in doubles: # normal for plane n m o norm_x, norm_y, norm_z = plane_normal(nmx, nmy, nmz, *doubles[n]) @@ -280,7 +285,7 @@ def __render_bonds(self: 'MoleculeContainer', xyz): f" \n \n" f" \n \n" " \n \n \n") - elif order == 3: + elif bond == 3: nox, noy, noz = vector_normal(nmx, nmy, nmz) # normal for plane n m o diff --git a/chython/containers/__init__.py b/chython/containers/__init__.py index 6658eeaa..0f2f3dbb 100644 --- a/chython/containers/__init__.py +++ b/chython/containers/__init__.py @@ -36,7 +36,8 @@ def unpach(data: bytes, /, *, compressed=True) -> Union[MoleculeContainer, React return ReactionContainer.unpack(data, compressed=False) +unpack = unpach + + __all__ = [x for x in locals() if x.endswith('Container')] -__all__.append('Bond') -__all__.append('QueryBond') -__all__.append('unpach') +__all__.extend(['Bond', 'QueryBond', 'unpack', 'unpach']) diff --git a/chython/containers/_cpack.pyx b/chython/containers/_cpack.pyx deleted file mode 100644 index 9d4be647..00000000 --- a/chython/containers/_cpack.pyx +++ /dev/null @@ -1,211 +0,0 @@ -# -*- coding: utf-8 -*- -# cython: language_level=3 -# -# Copyright 2023 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -cimport cython -from cpython.mem cimport PyMem_Malloc, PyMem_Free - -from chython.containers.bonds import Bond - -# Format specification:: -# -# Big endian bytes order -# 8 bit - 0x03 (format specification version) -# Atom block 3 bytes (repeated): -# 1 bit - atom entrance flag (always 1) -# 7 bit - atomic number (<=118) -# 3 bit - hydrogens (0-7). Note: 7 == None -# 4 bit - charge (charge + 4. possible range -4 - 4) -# 1 bit - radical state -# 1 bit padding -# 3 bit tetrahedron/allene sign -# (000 - not stereo or unknown, 001 - pure-unknown-enantiomer, 010 or 011 - has stereo) -# 4 bit - number of following bonds and CT blocks (0-15) -# -# Bond block 2 bytes (repeated 0-15 times) -# 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) -# 4 bit - bond order: 0000 - single, 0001 - double, 0010 - triple, 0011 - aromatic, 0111 - special -# -# Cis-Trans 2 bytes -# 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) -# 4 bit - CT sign: 1000 or 1001 - to avoid overlap with bond - -@cython.nonecheck(False) -@cython.boundscheck(False) -@cython.cdivision(True) -@cython.wraparound(False) -def unpack(const unsigned char[::1] data not None): - cdef char *charges - cdef unsigned char *atoms, *hydrogens, *radicals, *is_chiral, *neighbors, **orders, *seen - cdef unsigned short **connections, *ct_stereo - cdef bint *stereo_sign, *ct_sign - - cdef unsigned char a, b, i - cdef unsigned short size, shift = 1, n, m, bond_shift, atoms_count, ct_count = 0, ct_shift = 0 - - cdef tuple py_xy - cdef object bond, py_n, py_m - cdef list py_mapping, py_atoms, py_isotopes, py_bonds_flat - cdef dict py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, py_ngb - cdef dict py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo - - # allocate memory - size = len(data) - atoms = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - charges = PyMem_Malloc(size / 3 * sizeof(char)) - radicals = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - hydrogens = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - is_chiral = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - stereo_sign = PyMem_Malloc(size / 3 * sizeof(bint)) - ct_stereo = PyMem_Malloc(size / 3 * sizeof(unsigned short)) - ct_sign = PyMem_Malloc(size / 6 * sizeof(bint)) - seen = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - neighbors = PyMem_Malloc(size / 3 * sizeof(unsigned char)) - connections = PyMem_Malloc(size / 3 * sizeof(unsigned short*)) - orders = PyMem_Malloc(size / 3 * sizeof(unsigned char *)) - for n in range(size / 3): - connections[n] = PyMem_Malloc(15 * sizeof(unsigned short)) - orders[n] = PyMem_Malloc(15 * sizeof(unsigned char)) - - # unpack atom block to separate attributes arrays - n = 0 - while shift < size: - seen[n] = 0 # erase randomness - a = data[shift] - if a & 0x80 == 0: # end of pack - break - atoms[n] = a & 0x7f - - a = data[shift + 1] - hydrogens[n] = a >> 5 - charges[n] = ((a >> 1) & 0x0f) - 4 - radicals[n] = a & 0x01 - - a = data[shift + 2] - bond_shift = a & 0x0f - b = a >> 4 - if b == 0b0011: - is_chiral[n] = 1 - stereo_sign[n] = True - elif b == 0b0010: - is_chiral[n] = 1 - stereo_sign[n] = False - else: - is_chiral[n] = 0 - - shift += 3 - neighbors[n] = 0 - for i in range(bond_shift): - a, b = data[shift], data[shift + 1] - shift += 2 - - m = n - (a << 4 | b >> 4) # second atom index - b &= 0x0f - - if b < 8: - connections[n][neighbors[n]] = m - connections[m][neighbors[m]] = n - orders[m][neighbors[m]] = b + 1 # only single direction - neighbors[n] += 1 - neighbors[m] += 1 - else: # CT stereo - ct_stereo[ct_shift] = m + 1 - ct_stereo[ct_shift + 1] = n + 1 - ct_sign[ct_count] = b & 0x01 - ct_count += 1 - ct_shift += 2 - n += 1 - atoms_count = n - - # define returned data - py_mapping = [] - py_atoms = [] - py_isotopes = [] - py_charges = {} - py_radicals = {} - py_hydrogens = {} - py_plane = {} - py_atoms_stereo = {} - py_allenes_stereo = {} - py_cis_trans_stereo = {} - py_bonds = {} - py_bonds_flat = [] - py_xy = (0., 0.) - - for n in range(atoms_count): - seen[n] = 1 - py_n = n + 1 # shared py int obj - - # fill intermediate data - py_mapping.append(py_n) - py_atoms.append(atoms[n]) - py_isotopes.append(None) - - py_charges[py_n] = charges[n] - py_radicals[py_n] = bool(radicals[n]) - if hydrogens[n] == 7: - py_hydrogens[py_n] = None - else: - py_hydrogens[py_n] = hydrogens[n] - - py_plane[py_n] = py_xy - - if is_chiral[n]: - if neighbors[n] == 2: # allene - py_allenes_stereo[py_n] = stereo_sign[n] - else: - py_atoms_stereo[py_n] = stereo_sign[n] - - py_bonds[py_n] = py_ngb = {} - for i in range(neighbors[n]): - m = connections[n][i] - py_m = m + 1 - if seen[m]: # bond partially exists. need back-connection. - py_ngb[py_m] = py_bonds[py_m][py_n] - else: - bond = object.__new__(Bond) - bond._Bond__order = orders[n][i] - bond._Bond__n = py_n - bond._Bond__m = py_m - py_ngb[py_m] = bond - py_bonds_flat.append(bond) - - ct_shift = 0 - for n in range(ct_count): - py_cis_trans_stereo[(ct_stereo[ct_shift], ct_stereo[ct_shift + 1])] = ct_sign[n] - ct_shift += 2 - - PyMem_Free(atoms) - PyMem_Free(charges) - PyMem_Free(radicals) - PyMem_Free(hydrogens) - PyMem_Free(is_chiral) - PyMem_Free(stereo_sign) - PyMem_Free(ct_stereo) - PyMem_Free(ct_sign) - PyMem_Free(neighbors) - PyMem_Free(seen) - for n in range(size / 3): - PyMem_Free(connections[n]) - PyMem_Free(orders[n]) - PyMem_Free(connections) - PyMem_Free(orders) - - return (py_mapping, py_atoms, py_isotopes, - py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, - py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo, shift, py_bonds_flat) diff --git a/chython/containers/_pack.pyx b/chython/containers/_pack_v2.pyx similarity index 73% rename from chython/containers/_pack.pyx rename to chython/containers/_pack_v2.pyx index fa61afc0..f216d299 100644 --- a/chython/containers/_pack.pyx +++ b/chython/containers/_pack_v2.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,8 +19,9 @@ cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.math cimport ldexp, frexp +from libc.string cimport memset -# Format specification:: +# Format V2 specification:: # # Big endian bytes order # 8 bit - 0x02 (current format specification) @@ -48,40 +49,33 @@ from libc.math cimport ldexp, frexp # 7 bit - zero padding. in future can be used for extra bond-level stereo, like atropoisomers. # 1 bit - sign + @cython.nonecheck(False) @cython.boundscheck(False) @cython.cdivision(True) @cython.wraparound(False) def pack(object molecule): - cdef bint b # binary flag + cdef bint b = True # binary flag cdef char charge - cdef unsigned char atomic_number, isotope, bond, s = 0, buffer_b, buffer_o - cdef unsigned char *p, *data - cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m + cdef unsigned char atomic_number, ngb_count, isotope, bond, s = 0, buffer_b, buffer_o, stereo, hcr + cdef unsigned char *data + cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, n, m, tn, tm cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift # can be > 2^16 - cdef unsigned char[4096] stereo, hcr, seen - cdef unsigned int[4096] xy # 2 * 16bit + cdef unsigned char[4096] seen # atom number is 12 bit, thus, can be any value up to 4095. numbers are not continuous cdef bytes py_pack - cdef dict py_ngb, py_atoms, py_bonds, py_charges, py_radicals, py_hydrogens, py_plane - cdef dict py_cis_trans_stereo, py_atoms_stereo, py_allenes_stereo + cdef dict py_ngb, py_atoms, py_bonds, py_stereo cdef tuple py_tuple cdef object py_atom, py_bond, py_nan_int, py_obj # map molecule to vars py_atoms = molecule._atoms py_bonds = molecule._bonds - py_charges = molecule._charges - py_radicals = molecule._radicals - py_hydrogens = molecule._hydrogens - py_cis_trans_stereo = molecule._cis_trans_stereo - py_atoms_stereo = molecule._atoms_stereo - py_allenes_stereo = molecule._allenes_stereo - py_plane = molecule._plane + py_stereo = molecule._stereo_cis_trans_terminals # calculate elements count atoms_count = len(py_atoms) - cis_trans_count = len(py_cis_trans_stereo) + cis_trans_count = molecule._cis_trans_count for py_ngb in py_bonds.values(): bonds_count += len(py_ngb) @@ -103,35 +97,7 @@ def pack(object molecule): if not data: raise MemoryError() - # precalculate atom attrs - # should be done independently, due to possible randomness in dicts order. - # 3 bit - hydrogens (0-7) | 4 bit - charge | 1 bit - radical - for n, py_nan_int in py_hydrogens.items(): - if py_nan_int is None: - hcr[n] = 0xe0 # 0b11100000 - else: - hcr[n] = py_nan_int << 5 - for n, charge in py_charges.items(): - hcr[n] |= (charge + 4) << 1 - for n, b in py_radicals.items(): - if b: # lazy memory access - hcr[n] |= 1 - - # 2 float16 big endian - for n, py_tuple in py_plane.items(): - p = &xy[n] - double_to_float16(py_tuple[0], &p[0]) - double_to_float16(py_tuple[1], &p[2]) - - # erase random data - seen[n] = 0 - stereo[n] = 0 - - # 2 bit tetrahedron | 2 bit allene | 0000 - for n, b in py_atoms_stereo.items(): - stereo[n] = 0xc0 if b else 0x80 - for n, b in py_allenes_stereo.items(): - stereo[n] = 0x30 if b else 0x20 + memset(seen, 0, 4096 * sizeof(unsigned char)) # erase random data # start pack collection data[0] = 2 # header. specification version 2 @@ -139,28 +105,59 @@ def pack(object molecule): data[2] = atoms_count << 4 | cis_trans_count >> 8 # 1-4b of atom count value, 9-12b of cis-trans count value data[3] = cis_trans_count # 1-8b of cis-trans count value - b = True # init connection table flag for py_obj, py_atom in py_atoms.items(): py_ngb = py_bonds[py_obj] + ngb_count = len(py_ngb) n = py_obj # cast to C seen[n] = 1 - p = &xy[n] # XY atomic_number = py_atom.atomic_number - py_nan_int = py_atom._Core__isotope # direct access + + py_nan_int = py_atom._isotope # direct access if py_nan_int is None: isotope = 0 else: isotope = py_nan_int - common_isotopes[atomic_number] + py_nan_int = py_atom._stereo + if py_nan_int is None: + stereo = 0 + # V2 specification + # 2 bit tetrahedron | 2 bit allene | 0000 + elif py_nan_int: + if ngb_count == 2: # allene + stereo = 0x30 + else: + stereo = 0xc0 + else: + if ngb_count == 2: # allene + stereo = 0x20 + else: + stereo = 0x80 + + # precalculate atom attrs + # should be done independently, due to possible randomness in dicts order. + # 3 bit - hydrogens (0-7) | 4 bit - charge | 1 bit - radical + py_nan_int = py_atom._implicit_hydrogens + if py_nan_int is None: + hcr = 0xe0 # 0b11100000 + else: + hcr = py_nan_int << 5 + + charge = py_atom._charge + hcr |= (charge + 4) << 1 + if py_atom._is_radical: + hcr |= 1 + data[atoms_shift] = n >> 4 # 5-12b AN - data[atoms_shift + 1] = n << 4 | len(py_ngb) # 1-4b AN, 4b NC - data[atoms_shift + 2] = stereo[n] | isotope >> 1 # TS , AS , 4b I + data[atoms_shift + 1] = n << 4 | ngb_count # 1-4b AN, 4b NC + data[atoms_shift + 2] = stereo | isotope >> 1 # TS , AS , 4b I data[atoms_shift + 3] = isotope << 7 | atomic_number # 1bI , A - data[atoms_shift + 4] = p[0] - data[atoms_shift + 5] = p[1] - data[atoms_shift + 6] = p[2] - data[atoms_shift + 7] = p[3] - data[atoms_shift + 8] = hcr[n] + + # 2 float16 big endian + double_to_float16(py_atom.x, &data[atoms_shift + 4]) + double_to_float16(py_atom.y, &data[atoms_shift + 6]) + + data[atoms_shift + 8] = hcr atoms_shift += 9 # collect connection table @@ -178,7 +175,7 @@ def pack(object molecule): b = True if not seen[m]: - bond = py_bond._Bond__order - 1 + bond = py_bond._order - 1 # 3 3 2 | 1 3 3 1 | 2 3 3 if s == 0: buffer_o = bond << 5 @@ -210,17 +207,19 @@ def pack(object molecule): order_shift += 1 s = 0 + py_nan_int = py_bond._stereo + if py_nan_int is not None: + py_tuple = py_stereo[py_obj] + tn, tm = py_tuple + data[cis_trans_shift] = tn >> 4 + data[cis_trans_shift + 1] = tn << 4 | tm >> 8 + data[cis_trans_shift + 2] = tm + data[cis_trans_shift + 3] = py_nan_int + cis_trans_shift += 4 + if s: # flush buffer data[order_shift] = buffer_o - for py_tuple, b in py_cis_trans_stereo.items(): - n, m = py_tuple - data[cis_trans_shift] = n >> 4 - data[cis_trans_shift + 1] = n << 4 | m >> 8 - data[cis_trans_shift + 2] = m - data[cis_trans_shift + 3] = b - cis_trans_shift += 4 - try: py_pack = data[:size] finally: diff --git a/chython/containers/_unpack.pyx b/chython/containers/_unpack_v0v2.pyx similarity index 57% rename from chython/containers/_unpack.pyx rename to chython/containers/_unpack_v0v2.pyx index 670f1f7b..a4903ad8 100644 --- a/chython/containers/_unpack.pyx +++ b/chython/containers/_unpack_v0v2.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # cython: language_level=3 # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -21,7 +21,16 @@ cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.math cimport ldexp +from chython.containers import MoleculeContainer from chython.containers.bonds import Bond +from chython.periodictable import (H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, + Mn, Fe, Co, Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, + Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, + Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, Tl, Pb, Bi, Po, At, Rn, Fr, + Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, Bh, Hs, + Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og) +from chython.periodictable.base.vector import Vector + # Format specification:: # @@ -56,21 +65,17 @@ from chython.containers.bonds import Bond @cython.cdivision(True) @cython.wraparound(False) def unpack(const unsigned char[::1] data not None): - cdef char *charges - cdef unsigned char a, b, c, d, isotope, atomic_number, neighbors_count, s = 0, nc, version - cdef unsigned char *atoms, *hydrogens, *neighbors, *orders, *is_tet, *is_all - cdef bint *stereo_sign, *ct_sign, *radicals + cdef unsigned char a, b, c, d, isotope, atomic_number, neighbors_count, s = 0, version, stereo, hydrogens + cdef unsigned char *neighbors, *orders cdef unsigned short atoms_count, bonds_count = 0, cis_trans_count, order_count cdef unsigned short i, j, k = 0, n, m, buffer_b, shift = 0 - cdef unsigned short *mapping, *isotopes, *cis_trans_1, *cis_trans_2, *connections + cdef unsigned short *mapping, *connections cdef unsigned int size, atoms_shift = 4, bonds_shift, order_shift, cis_trans_shift - cdef double *x_coord, *y_coord cdef unsigned char[4096] seen - cdef object bond, py_n, py_m - cdef dict py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, py_ngb - cdef dict py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo - cdef list py_mapping, py_atoms, py_isotopes, py_bonds_flat + cdef object py_mol, py_bond, py_n, py_m, py_atom, py_nan_bool, py_vector + cdef dict py_atoms, py_bonds, py_ngb + cdef list py_cis_trans # read header version = data[0] @@ -79,24 +84,17 @@ def unpack(const unsigned char[::1] data not None): cis_trans_count = (b & 0x0f) << 8 | c # allocate memory - charges = PyMem_Malloc(atoms_count * sizeof(char)) - radicals = PyMem_Malloc(atoms_count * sizeof(bint)) - atoms = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - hydrogens = PyMem_Malloc(atoms_count * sizeof(unsigned char)) neighbors = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - is_tet = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - is_all = PyMem_Malloc(atoms_count * sizeof(unsigned char)) - stereo_sign = PyMem_Malloc(atoms_count * sizeof(bint)) mapping = PyMem_Malloc(atoms_count * sizeof(unsigned short)) - isotopes = PyMem_Malloc(atoms_count * sizeof(unsigned short)) - x_coord = PyMem_Malloc(atoms_count * sizeof(double)) - y_coord = PyMem_Malloc(atoms_count * sizeof(double)) - if not charges or not radicals or not atoms or not hydrogens or not neighbors or not is_tet or not is_all: - raise MemoryError() - if not stereo_sign or not mapping or not isotopes or not x_coord or not y_coord: + if not neighbors or not mapping: raise MemoryError() + py_mol = MoleculeContainer() + py_atoms = py_mol._atoms + py_bonds = py_mol._bonds + py_cis_trans = [] + # unpack atom block to separate attributes arrays for i in range(atoms_count): a, b = data[atoms_shift], data[atoms_shift + 1] @@ -106,34 +104,49 @@ def unpack(const unsigned char[::1] data not None): bonds_count += neighbors_count a, b = data[atoms_shift + 2], data[atoms_shift + 3] - if a >> 7: # tetrahedron bit set - is_tet[i] = 1 - is_all[i] = 0 - stereo_sign[i] = a & 0x40 # mask th bit - else: - is_tet[i] = 0 - if a >> 5: # allene bit set - is_all[i] = 1 - stereo_sign[i] = a & 0x10 # mask al bit - else: - is_all[i] = 0 + stereo = a >> 4 + if stereo == 0: + py_nan_bool = None + elif stereo == 0b0010: + py_nan_bool = False + elif stereo == 0b0011: + py_nan_bool = True + elif stereo == 0b1000: + py_nan_bool = False + else: # if stereo == 0b1100: + py_nan_bool = True + + atomic_number = b & 0x7f + py_atom = object.__new__(elements[atomic_number]) + py_atoms[n] = py_atom + + py_atom._stereo = py_nan_bool - atoms[i] = atomic_number = b & 0x7f isotope = (a & 0x0f) << 1 | b >> 7 if isotope: - isotopes[i] = common_isotopes[atomic_number] + isotope + py_atom._isotope = common_isotopes[atomic_number] + isotope else: - isotopes[i] = 0 + py_atom._isotope = None + py_vector = object.__new__(Vector) a, b = data[atoms_shift + 4], data[atoms_shift + 5] - x_coord[i] = double_from_bytes(a, b) + py_vector.x = double_from_bytes(a, b) a, b = data[atoms_shift + 6], data[atoms_shift + 7] - y_coord[i] = double_from_bytes(a, b) + py_vector.y = double_from_bytes(a, b) + py_atom._xy = py_vector a = data[atoms_shift + 8] - hydrogens[i] = a >> 5 - charges[i] = ((a >> 1) & 0x0f) - 4 - radicals[i] = a & 0x01 + hydrogens = a >> 5 + if hydrogens == 7: + py_atom._implicit_hydrogens = None + else: + py_atom._implicit_hydrogens = hydrogens + + py_atom._charge = ((a >> 1) & 0x0f) - 4 + if a & 0x01: + py_atom._is_radical = True + else: + py_atom._is_radical = False atoms_shift += 9 # calculate bonds count and pack sections @@ -145,7 +158,7 @@ def unpack(const unsigned char[::1] data not None): order_count = order_count / 8 + 1 else: order_count /= 8 - elif version == 0: + else: # if version == 0: order_count = bonds_count / 5 if bonds_count % 5: order_count += 1 @@ -193,7 +206,7 @@ def unpack(const unsigned char[::1] data not None): buffer_b = (a & 0x3) << 1 s = 1 i += 2 - elif version == 0: + else: # if version == 0: for j in range(order_shift, cis_trans_shift, 2): # 0 3 3 1 | 2 3 3 a, b = data[j], data[j + 1] @@ -204,102 +217,45 @@ def unpack(const unsigned char[::1] data not None): orders[i + 4] = b & 0x7 i += 5 - if cis_trans_count: - cis_trans_1 = PyMem_Malloc(cis_trans_count * sizeof(unsigned short)) - cis_trans_2 = PyMem_Malloc(cis_trans_count * sizeof(unsigned short)) - ct_sign = PyMem_Malloc(cis_trans_count * sizeof(bint)) - if not cis_trans_1 or not cis_trans_2 or not ct_sign: - raise MemoryError() - - for i in range(cis_trans_count): - a, b = data[cis_trans_shift], data[cis_trans_shift + 1] - c, d = data[cis_trans_shift + 2], data[cis_trans_shift + 3] - cis_trans_1[i] = a << 4 | b >> 4 - cis_trans_2[i] = (b & 0x0f) << 8 | c - ct_sign[i] = d # d = 0x01 or 0x00 - cis_trans_shift += 4 - - # define returned data - py_mapping = [] - py_atoms = [] - py_isotopes = [] - py_charges = {} - py_radicals = {} - py_hydrogens = {} - py_plane = {} - py_atoms_stereo = {} - py_allenes_stereo = {} - py_cis_trans_stereo = {} - py_bonds = {} - py_bonds_flat = [] + for i in range(atoms_count): + n = mapping[i] + py_n = n # shared py int obj - for i in range(atoms_count): - n = mapping[i] - py_n = n # shared py int obj - - # fill intermediate data - py_mapping.append(py_n) - py_atoms.append(atoms[i]) - py_isotopes.append(isotopes[i] or None) - - py_charges[py_n] = charges[i] - py_radicals[py_n] = radicals[i] - if hydrogens[i] == 7: - py_hydrogens[py_n] = None - else: - py_hydrogens[py_n] = hydrogens[i] + py_bonds[py_n] = py_ngb = {} + seen[n] = 1 - py_plane[py_n] = (x_coord[i], y_coord[i]) - - if is_tet[i]: - py_atoms_stereo[py_n] = stereo_sign[i] - elif is_all[i]: - py_allenes_stereo[py_n] = stereo_sign[i] + neighbors_count = neighbors[i] + for j in range(shift, shift + neighbors_count): + m = connections[j] + py_m = m + if seen[m]: # bond partially exists. need back-connection. + py_ngb[py_m] = py_bonds[py_m][py_n] + else: + py_bond = object.__new__(Bond) + py_bond._order = orders[k] + 1 + py_bond._stereo = None + py_ngb[py_m] = py_bond + k += 1 + shift += neighbors_count - py_bonds[py_n] = py_ngb = {} - seen[n] = 1 + PyMem_Free(orders) + PyMem_Free(connections) - nc = neighbors[i] - for j in range(shift, shift + nc): - m = connections[j] - py_m = m - if seen[m]: # bond partially exists. need back-connection. - py_ngb[py_m] = py_bonds[py_m][py_n] + if cis_trans_count: + for i in range(cis_trans_count): + a, b = data[cis_trans_shift], data[cis_trans_shift + 1] + c, d = data[cis_trans_shift + 2], data[cis_trans_shift + 3] + py_n = a << 4 | b >> 4 + py_m = (b & 0x0f) << 8 | c + if d: + py_cis_trans.append((py_n, py_m, True)) else: - bond = object.__new__(Bond) - bond._Bond__order = orders[k] + 1 - bond._Bond__n = py_n - bond._Bond__m = py_m - py_ngb[py_m] = bond - py_bonds_flat.append(bond) - k += 1 - shift += nc - - for i in range(cis_trans_count): - py_cis_trans_stereo[(cis_trans_1[i], cis_trans_2[i])] = ct_sign[i] - - PyMem_Free(charges) - PyMem_Free(radicals) - PyMem_Free(atoms) - PyMem_Free(hydrogens) + py_cis_trans.append((py_n, py_m, False)) + cis_trans_shift += 4 + PyMem_Free(neighbors) - PyMem_Free(is_tet) - PyMem_Free(is_all) - PyMem_Free(stereo_sign) PyMem_Free(mapping) - PyMem_Free(isotopes) - PyMem_Free(x_coord) - PyMem_Free(y_coord) - if bonds_count: - PyMem_Free(connections) - PyMem_Free(orders) - if cis_trans_count: - PyMem_Free(cis_trans_1) - PyMem_Free(cis_trans_2) - PyMem_Free(ct_sign) - return (py_mapping, py_atoms, py_isotopes, - py_charges, py_radicals, py_hydrogens, py_plane, py_bonds, - py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo, size, py_bonds_flat) + return py_mol, py_cis_trans, size cdef short[119] common_isotopes @@ -311,6 +267,13 @@ common_isotopes[:] = [0, -15, -12, -9, -7, -5, -4, -2, 0, 3, 4, 7, 8, 11, 12, 15 222, 221, 228, 227, 231, 231, 235, 236, 241, 242, 243, 244, 245, 254, 253, 254, 254, 262, 265, 265, 269, 262, 273, 273, 277, 281, 278] +cdef list elements +elements = [None, H, He, Li, Be, B, C, N, O, F, Ne, Na, Mg, Al, Si, P, S, Cl, Ar, K, Ca, Sc, Ti, V, Cr, Mn, Fe, Co, + Ni, Cu, Zn, Ga, Ge, As, Se, Br, Kr, Rb, Sr, Y, Zr, Nb, Mo, Tc, Ru, Rh, Pd, Ag, Cd, In, Sn, Sb, Te, I, Xe, + Cs, Ba, La, Ce, Pr, Nd, Pm, Sm, Eu, Gd, Tb, Dy, Ho, Er, Tm, Yb, Lu, Hf, Ta, W, Re, Os, Ir, Pt, Au, Hg, + Tl, Pb, Bi, Po, At, Rn, Fr, Ra, Ac, Th, Pa, U, Np, Pu, Am, Cm, Bk, Cf, Es, Fm, Md, No, Lr, Rf, Db, Sg, + Bh, Hs, Mt, Ds, Rg, Cn, Nh, Fl, Mc, Lv, Ts, Og] + cdef double double_from_bytes(unsigned char a, unsigned char b): cdef bint sign diff --git a/chython/containers/bonds.py b/chython/containers/bonds.py index cb61af29..727b15d7 100644 --- a/chython/containers/bonds.py +++ b/chython/containers/bonds.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2022 Ramil Nugmanov +# Copyright 2019-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,94 +17,72 @@ # along with this program; if not, see . # from typing import Optional, Tuple, Union, List, Set -from weakref import ref -from ..exceptions import IsConnectedBond, IsNotConnectedBond class Bond: - __slots__ = ('__order', '__graph', '__n', '__m') + __slots__ = ('_order', '_in_ring', '_stereo') - def __init__(self, order: int): + def __init__(self, order: int, *, stereo: Optional[bool] = None): if not isinstance(order, int): raise TypeError('invalid order value') elif order not in (1, 4, 2, 3, 8): raise ValueError('order should be from [1, 2, 3, 4, 8]') - self.__order = order + self._order = order + self._stereo = stereo def __eq__(self, other): - if isinstance(other, Bond): - return self.__order == other.order - elif isinstance(other, int): - return self.__order == other + if isinstance(other, int): + return self.order == other + elif isinstance(other, Bond): + return self.order == other.order return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order})' + return f'{self.__class__.__name__}({self.order})' def __int__(self): """ Bond order. """ - return self.__order + return self.order def __hash__(self): """ Bond order. Used in Morgan atoms ordering. """ - return self.__order - - def __getstate__(self): - return {'order': self.__order} - - def __setstate__(self, state): - self.__order = state['order'] + return self.order @property def order(self) -> int: - return self.__order + return self._order + + @property + def stereo(self) -> Optional[bool]: + return self._stereo @property def in_ring(self) -> bool: - try: - return self.__graph().is_ring_bond(self.__n, self.__m) - except AttributeError: - raise IsNotConnectedBond + return self._in_ring - def copy(self) -> 'Bond': + def copy(self, full=False, stereo=False) -> 'Bond': copy = object.__new__(self.__class__) - copy._Bond__order = self.__order - return copy - - @classmethod - def from_bond(cls, bond): - if isinstance(bond, cls): - copy = object.__new__(cls) - copy._Bond__order = bond.order - return copy - raise TypeError('Bond expected') - - def _attach_graph(self, graph, n, m): - try: - self.__graph - except AttributeError: - self.__graph = ref(graph) - self.__n = n - self.__m = m + copy._order = self.order + if full: + copy._stereo = self.stereo + copy._in_ring = self.in_ring else: - raise IsConnectedBond + if stereo: + copy._stereo = self.stereo + else: + copy._stereo = None + return copy - def _change_map(self, n, m): - try: - self.__graph - except AttributeError: - raise IsNotConnectedBond - else: - self.__n = n - self.__m = m + def __copy__(self): + return self.copy() class DynamicBond: - __slots__ = ('__order', '__p_order') + __slots__ = ('_order', '_p_order') def __init__(self, order=None, p_order=None): if order is None: @@ -118,16 +96,16 @@ def __init__(self, order=None, p_order=None): if order not in (1, 4, 2, 3, None, 8) or p_order not in (1, 4, 2, 3, None, 8): raise ValueError('order or p_order should be from [1, 2, 3, 4, 8]') - self.__order = order - self.__p_order = p_order + self._order = order + self._p_order = p_order def __eq__(self, other): if isinstance(other, DynamicBond): - return self.__order == other.order and self.__p_order == other.p_order + return self.order == other.order and self.p_order == other.p_order return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order}, {self.__p_order})' + return f'{self.__class__.__name__}({self.order}, {self.p_order})' def __int__(self): """ @@ -139,47 +117,46 @@ def __hash__(self): """ Hash of bond orders. """ - return hash((self.__order or 0, self.__p_order or 0)) + return hash((self.order or 0, self.p_order or 0)) @property def is_dynamic(self) -> bool: """ Bond has dynamic features """ - return self.__order != self.__p_order + return self.order != self.p_order @property def order(self) -> Optional[int]: - return self.__order + return self._order @property def p_order(self) -> Optional[int]: - return self.__p_order + return self._p_order def copy(self) -> 'DynamicBond': copy = object.__new__(self.__class__) - copy._DynamicBond__order = self.__order - copy._DynamicBond__p_order = self.__p_order + copy._order = self.order + copy._p_order = self.p_order return copy + def __copy__(self): + return self.copy() + @classmethod - def from_bond(cls, bond): - if isinstance(bond, Bond): - copy = object.__new__(cls) - copy._DynamicBond__order = copy._DynamicBond__p_order = bond.order - return copy - elif isinstance(bond, cls): - copy = object.__new__(cls) - copy._DynamicBond__order = bond.order - copy._DynamicBond__p_order = bond.p_order - return copy - raise TypeError('DynamicBond expected') + def from_bond(cls, bond: 'Bond') -> 'DynamicBond': + if not isinstance(bond, Bond): + raise TypeError('Bond expected') + copy = object.__new__(cls) + copy._order = copy._p_order = bond.order + return copy class QueryBond: - __slots__ = ('__order', '__in_ring') + __slots__ = ('_order', '_in_ring', '_stereo') - def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], in_ring: Optional[bool] = None): + def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], + in_ring: Optional[bool] = None, stereo: Optional[bool] = None): if isinstance(order, (list, tuple, set)): if not all(isinstance(x, int) for x in order): raise TypeError('invalid order value') @@ -194,65 +171,85 @@ def __init__(self, order: Union[int, List[int], Set[int], Tuple[int, ...]], in_r raise TypeError('invalid order value') if in_ring is not None and not isinstance(in_ring, bool): raise TypeError('in_ring mark should be boolean or None') - self.__order = order - self.__in_ring = in_ring + self._order = order + self._in_ring = in_ring + self.stereo = stereo def __eq__(self, other): if isinstance(other, Bond): - if self.__in_ring is not None: - if self.__in_ring != other.in_ring: + if self.in_ring is not None: + if self.in_ring != other.in_ring: return False - return other.order in self.__order + return other.order in self.order elif isinstance(other, QueryBond): - return self.__order == other.order and self.__in_ring == other.in_ring + return self.order == other.order and self.in_ring == other.in_ring elif isinstance(other, int): - return other in self.__order + return other in self.order return False def __repr__(self): - return f'{self.__class__.__name__}({self.__order}, {self.__in_ring})' + return f'{self.__class__.__name__}({self.order}, {self.in_ring})' def __int__(self): """ Simple bond order or hash of sorted tuple of orders. """ - if len(self.__order) == 1: - return self.__order[0] - return hash(self.__order) + if len(self.order) == 1: + return self.order[0] + return hash(self.order) def __hash__(self): """ Hash of orders and cycle mark. Used in Morgan atoms ordering. """ - return hash((self.__order, self.__in_ring)) + return hash((self.order, self.in_ring)) @property def order(self) -> Tuple[int, ...]: - return self.__order + return self._order @property def in_ring(self) -> Optional[bool]: - return self.__in_ring + return self._in_ring + + @property + def stereo(self) -> Optional[bool]: + return self._stereo + + @stereo.setter + def stereo(self, value): + if value is not None and not isinstance(value, bool): + raise TypeError('stereo mark should be boolean or None') + self._stereo = value - def copy(self) -> 'QueryBond': + def copy(self, full=False) -> 'QueryBond': copy = object.__new__(self.__class__) - copy._QueryBond__order = self.__order - copy._QueryBond__in_ring = self.__in_ring + copy._order = self.order + if full: + copy._in_ring = self.in_ring + copy._stereo = self.stereo + else: + copy._in_ring = copy._stereo = None return copy + def __copy__(self): + return self.copy() + @classmethod - def from_bond(cls, bond): - if isinstance(bond, Bond): - copy = object.__new__(cls) - copy._QueryBond__order = (bond.order,) - copy._QueryBond__in_ring = None - return copy - elif isinstance(bond, cls): - copy = object.__new__(cls) - copy._QueryBond__order = bond.order - copy._QueryBond__in_ring = bond.in_ring - return copy - raise TypeError('QueryBond or Bond expected') + def from_bond(cls, bond: 'Bond', stereo=False, in_ring=False) -> 'QueryBond': + if not isinstance(bond, Bond): + raise TypeError('Bond expected') + copy = object.__new__(cls) + copy._order = (bond.order,) + if in_ring: + copy._in_ring = bond.in_ring + else: + copy._in_ring = None + if stereo: + copy._stereo = bond.stereo + else: + copy._stereo = None + return copy __all__ = ['Bond', 'DynamicBond', 'QueryBond'] diff --git a/chython/containers/cgr.py b/chython/containers/cgr.py index 24959c80..9bdc697d 100644 --- a/chython/containers/cgr.py +++ b/chython/containers/cgr.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -28,21 +28,13 @@ class CGRContainer(CGRSmiles, Morgan, Rings, Isomorphism, FingerprintsCGR): - __slots__ = ('_atoms', '_bonds', '_charges', '_radicals', '_p_charges', '_p_radicals', '__dict__', '__weakref__') + __slots__ = ('_atoms', '_bonds', '__dict__') _atoms: Dict[int, DynamicElement] _bonds: Dict[int, Dict[int, DynamicBond]] - _charges: Dict[int, int] - _radicals: Dict[int, bool] - _p_charges: Dict[int, int] - _p_radicals: Dict[int, bool] def __init__(self): self._atoms = {} self._bonds = {} - self._charges = {} - self._radicals = {} - self._p_charges = {} - self._p_radicals = {} def bonds(self) -> Iterator[Tuple[int, int, DynamicBond]]: """ @@ -59,19 +51,8 @@ def bonds(self) -> Iterator[Tuple[int, int, DynamicBond]]: def center_atoms(self) -> Tuple[int, ...]: """ Get list of atoms of reaction center (atoms with dynamic: bonds, charges, radicals). """ - radicals = self._radicals - p_charges = self._p_charges - p_radicals = self._p_radicals - - center = set() - for n, c in self._charges.items(): - if c != p_charges[n] or radicals[n] != p_radicals[n]: - center.add(n) - - for n, m_bond in self._bonds.items(): - if any(bond.order != bond.p_order for bond in m_bond.values()): - center.add(n) - + center = {n for n, a in self._atoms.items() if a.is_dynamic} + center.update(n for n, m_bond in self._bonds.items() if any(bond.is_dynamic for bond in m_bond.values())) return tuple(center) def substructure(self, atoms) -> 'CGRContainer': @@ -82,22 +63,10 @@ def substructure(self, atoms) -> 'CGRContainer': """ atoms = set(atoms) sa = self._atoms - sc = self._charges - sr = self._radicals sb = self._bonds - spc = self._p_charges - spr = self._p_radicals sub = object.__new__(self.__class__) - sub._charges = {n: sc[n] for n in atoms} - sub._radicals = {n: sr[n] for n in atoms} - sub._p_charges = {n: spc[n] for n in atoms} - sub._p_radicals = {n: spr[n] for n in atoms} - - sub._atoms = ca = {} - for n in atoms: - ca[n] = atom = sa[n].copy() - atom._attach_graph(sub, n) + sub._atoms = {n: sa[n].copy() for n in atoms} sub._bonds = cb = {} for n in atoms: @@ -136,19 +105,5 @@ def get_mapping(self, other: 'CGRContainer', /, *, automorphism_filter: bool = T def __iter__(self): return iter(self._atoms) - def __getstate__(self): - return {'atoms': self._atoms, 'bonds': self._bonds, 'charges': self._charges, 'radicals': self._radicals, - 'p_charges': self._p_charges, 'p_radicals': self._p_radicals} - - def __setstate__(self, state): - self._atoms = state['atoms'] - for n, a in state['atoms'].items(): - a._attach_graph(self, n) - self._charges = state['charges'] - self._radicals = state['radicals'] - self._bonds = state['bonds'] - self._p_charges = state['p_charges'] - self._p_radicals = state['p_radicals'] - __all__ = ['CGRContainer'] diff --git a/chython/containers/graph.py b/chython/containers/graph.py index 4d9ad441..51fb0412 100644 --- a/chython/containers/graph.py +++ b/chython/containers/graph.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,8 +19,6 @@ from abc import ABC, abstractmethod from functools import cached_property from typing import Dict, Generic, Iterator, Optional, Tuple, TypeVar -from ..algorithms.morgan import Morgan -from ..algorithms.rings import Rings from ..exceptions import AtomNotFound, MappingError, BondNotFound @@ -28,27 +26,16 @@ Bond = TypeVar('Bond') -class Graph(Generic[Atom, Bond], Morgan, Rings, ABC): - __slots__ = ('_atoms', '_bonds', '_charges', '_radicals', '_atoms_stereo', '_cis_trans_stereo', '_allenes_stereo', - '__dict__', '__weakref__') +class Graph(Generic[Atom, Bond], ABC): + __slots__ = ('_atoms', '_bonds', '__dict__') __class_cache__ = {} _atoms: Dict[int, Atom] _bonds: Dict[int, Dict[int, Bond]] - _charges: Dict[int, int] - _radicals: Dict[int, bool] - _atoms_stereo: Dict[int, bool] - _allenes_stereo: Dict[int, bool] - _cis_trans_stereo: Dict[Tuple[int, int], bool] def __init__(self): self._atoms = {} self._bonds = {} - self._charges = {} - self._radicals = {} - self._atoms_stereo = {} - self._allenes_stereo = {} - self._cis_trans_stereo = {} def atom(self, n: int) -> Atom: return self._atoms[n] @@ -99,7 +86,7 @@ def bonds_count(self) -> int: return sum(len(x) for x in self._bonds.values()) // 2 @abstractmethod - def add_atom(self, atom: Atom, n: Optional[int] = None, *, charge: int = 0, is_radical: bool = False) -> int: + def add_atom(self, atom: Atom, n: Optional[int] = None) -> int: """ new atom addition """ @@ -109,19 +96,10 @@ def add_atom(self, atom: Atom, n: Optional[int] = None, *, charge: int = 0, is_r raise TypeError('mapping should be integer') elif n in self._atoms: raise MappingError('atom with same number exists') - elif not isinstance(is_radical, bool): - raise TypeError('bool expected') - elif not isinstance(charge, int): - raise TypeError('formal charge should be int in range [-4, 4]') - elif charge > 4 or charge < -4: - raise ValueError('formal charge should be in range [-4, 4]') - - atom._attach_graph(self, n) + self._atoms[n] = atom - self._charges[n] = charge - self._radicals[n] = is_radical self._bonds[n] = {} - self.__dict__.clear() + self.flush_cache() return n @abstractmethod @@ -137,118 +115,59 @@ def add_bond(self, n: int, m: int, bond: Bond): raise MappingError('atoms already bonded') self._bonds[n][m] = self._bonds[m][n] = bond - self.__dict__.clear() + self.flush_cache() - @abstractmethod def copy(self): """ copy of graph """ copy = object.__new__(self.__class__) - copy._charges = self._charges.copy() - copy._radicals = self._radicals.copy() - - copy._atoms = ca = {} - for n, atom in self._atoms.items(): - atom = atom.copy() - ca[n] = atom - atom._attach_graph(copy, n) + copy._atoms = {n: atom.copy(full=True) for n, atom in self.atoms()} + copy._bonds = cb = {} + for n, m_bond in self._bonds.items(): + cb[n] = cbn = {} + for m, bond in m_bond.items(): + if m in cb: # bond partially exists. need back-connection. + cbn[m] = cb[m][n] + else: + cbn[m] = bond.copy(full=True) return copy - @abstractmethod - def remap(self, mapping: Dict[int, int], *, copy=False): + def remap(self, mapping: Dict[int, int]): """ Change atom numbers :param mapping: mapping of old numbers to the new - :param copy: keep original graph """ if len(mapping) != len(set(mapping.values())) or \ not (self._atoms.keys() - mapping.keys()).isdisjoint(mapping.values()): raise ValueError('mapping overlap') mg = mapping.get - sc = self._charges - sr = self._radicals - - if copy: - h = self.__class__() - ha = h._atoms - hc = h._charges - hr = h._radicals - has = h._atoms_stereo - hal = h._allenes_stereo - hcs = h._cis_trans_stereo - - for n, atom in self._atoms.items(): - m = mg(n, n) - atom = atom.copy() - ha[m] = atom - atom._attach_graph(h, m) - else: - ha = {} - hc = {} - hr = {} - has = {} - hal = {} - hcs = {} - - for n, atom in self._atoms.items(): - m = mg(n, n) - ha[m] = atom - atom._change_map(m) # change mapping number - - for n in self._atoms: - m = mg(n, n) - hc[m] = sc[n] - hr[m] = sr[n] - - for n, stereo in self._atoms_stereo.items(): - has[mg(n, n)] = stereo - for n, stereo in self._allenes_stereo.items(): - hal[mg(n, n)] = stereo - for (n, m), stereo in self._cis_trans_stereo.items(): - hcs[(mg(n, n), mg(m, m))] = stereo - - if copy: - return h # noqa - - self._atoms = ha - self._charges = hc - self._radicals = hr - self._atoms_stereo = has - self._allenes_stereo = hal - self._cis_trans_stereo = hcs + self._atoms = {mg(n, n): atom for n, atom in self.atoms()} + self._bonds = {mg(n, n): {mg(m, m): bond for m, bond in m_bond.items()} for n, m_bond in self._bonds.items()} self.flush_cache() - return self - @abstractmethod def union(self, other: 'Graph', *, remap: bool = False, copy: bool = True): """ Merge Graphs into one. :param remap: if atoms has collisions then remap other graph atoms else raise exception. - :param copy: keep original structure and return new object + :param copy: keep original structure and return a new object """ if self._atoms.keys() & other._atoms.keys(): - if remap: - other = other.remap({n: i for i, n in enumerate(other, start=max(self._atoms) + 1)}, copy=True) - else: + if not remap: raise MappingError('mapping of graphs is not disjoint') - + other = other.copy() + other.remap({n: i for i, n in enumerate(other, start=max(self._atoms) + 1)}) + else: + other = other.copy() # make a copy u = self.copy() if copy else self - u._charges.update(other._charges) - u._radicals.update(other._radicals) - - ua = u._atoms - for n, atom in other._atoms.items(): - ua[n] = atom = atom.copy() - atom._attach_graph(u, n) - - u._atoms_stereo.update(other._atoms_stereo) - u._allenes_stereo.update(other._allenes_stereo) - u._cis_trans_stereo.update(other._cis_trans_stereo) - return u, other + u._atoms.update(other._atoms) + u._bonds.update(other._bonds) + if not copy: + self.flush_cache() + return u def flush_cache(self): self.__dict__.clear() @@ -277,24 +196,5 @@ def __iter__(self) -> Iterator[int]: def __bool__(self): return bool(self._atoms) - def __getstate__(self): - state = {'atoms': self._atoms, 'bonds': self._bonds, 'charges': self._charges, - 'radicals': self._radicals} - from chython import pickle_cache - - if pickle_cache: - state['cache'] = {k: v for k, v in self.__dict__.items() if k != '__cached_method___hash__'} - return state - - def __setstate__(self, state): - self._atoms = state['atoms'] - for n, a in state['atoms'].items(): - a._attach_graph(self, n) - self._charges = state['charges'] - self._radicals = state['radicals'] - self._bonds = state['bonds'] - if 'cache' in state: - self.__dict__.update(state['cache']) - __all__ = ['Graph'] diff --git a/chython/containers/molecule.py b/chython/containers/molecule.py index 56d6987b..b1201b49 100644 --- a/chython/containers/molecule.py +++ b/chython/containers/molecule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,62 +20,60 @@ from collections import Counter, defaultdict from functools import cached_property from numpy import uint, zeros -from typing import Dict, Iterable, List, Optional, Tuple, Union -from weakref import ref +from typing import Dict, Iterable, List, Tuple, Union from zlib import compress, decompress -from .bonds import Bond, DynamicBond, QueryBond +from .bonds import Bond, DynamicBond from .cgr import CGRContainer from .graph import Graph -from .query import QueryContainer from ..algorithms.aromatics import Aromatize from ..algorithms.calculate2d import Calculate2DMolecule from ..algorithms.depict import DepictMolecule from ..algorithms.isomorphism import MoleculeIsomorphism from ..algorithms.fingerprints import Fingerprints from ..algorithms.mcs import MCS +from ..algorithms.morgan import Morgan +from ..algorithms.rings import Rings from ..algorithms.smiles import MoleculeSmiles from ..algorithms.standardize import StandardizeMolecule from ..algorithms.stereo import MoleculeStereo from ..algorithms.tautomers import Tautomers from ..algorithms.x3dom import X3domMolecule -from ..exceptions import MappingError, ValenceError -from ..periodictable import DynamicElement, Element, QueryElement, H +from ..exceptions import ValenceError +from ..periodictable import DynamicElement, Element, H as _H -class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], MoleculeIsomorphism, Aromatize, StandardizeMolecule, - MoleculeSmiles, DepictMolecule, Calculate2DMolecule, Fingerprints, Tautomers, MCS, - X3domMolecule): - __slots__ = ('_plane', '_conformers', '_hydrogens', '_parsed_mapping', '_backup', '__meta', '__name') +# atomic number constants +H = 5 +C = 6 - _conformers: List[Dict[int, Tuple[float, float, float]]] - _hydrogens: Dict[int, Optional[int]] - _parsed_mapping: Dict[int, int] - _plane: Dict[int, Tuple[float, float]] + +class MoleculeContainer(MoleculeStereo, Graph[Element, Bond], Morgan, Rings, MoleculeIsomorphism, + Aromatize, StandardizeMolecule, MoleculeSmiles, DepictMolecule, Calculate2DMolecule, + Fingerprints, Tautomers, MCS, X3domMolecule): + __slots__ = ('_meta', '_name', '_conformers', '_changed', '_backup') def __init__(self): super().__init__() - self._conformers = [] - self._hydrogens = {} - self._parsed_mapping = {} - self._plane = {} - self.__meta = None - self.__name = None + self._meta = None + self._name = None + self._changed = None + self._backup = None @property def meta(self) -> Dict: - if self.__meta is None: - self.__meta = {} # lazy - return self.__meta + if self._meta is None: + self._meta = {} # lazy + return self._meta @property def name(self) -> str: - return self.__name or '' + return self._name or '' @name.setter def name(self, name): if not isinstance(name, str): - raise TypeError('name should be string up to 80 symbols') - self.__name = name + raise TypeError('name should be a string preferably up to 80 symbols') + self._name = name def environment(self, atom: int, include_bond: bool = True, include_atom: bool = True) -> \ Tuple[Union[Tuple[int, Bond, Element], @@ -101,70 +99,6 @@ def environment(self, atom: int, include_bond: bool = True, include_atom: bool = return tuple(self._bonds[atom].items()) return tuple(self._bonds[atom]) - @cached_args_method - def neighbors(self, n: int) -> int: - """number of neighbors atoms excluding any-bonded""" - return sum(b.order != 8 for b in self._bonds[n].values()) - - @cached_args_method - def hybridization(self, n: int) -> int: - """ - Atom hybridization. - - 1 - if atom has zero or only single bonded neighbors, 2 - if has only one double bonded neighbor and any amount - of single bonded, 3 - if has one triple bonded and any amount of double and single bonded neighbors or - two and more double bonded and any amount of single bonded neighbors, 4 - if atom in aromatic ring. - """ - hybridization = 1 - for bond in self._bonds[n].values(): - order = bond.order - if order == 4: - return 4 - elif order == 3: - if hybridization != 3: - hybridization = 3 - elif order == 2: - if hybridization == 1: - hybridization = 2 - elif hybridization == 2: - hybridization = 3 - return hybridization - - @cached_args_method - def heteroatoms(self, n: int) -> int: - """ - Number of neighbored heteroatoms (not carbon or hydrogen) except any-bond connected. - """ - atoms = self._atoms - return sum(atoms[m].atomic_number not in (1, 6) for m, b in self._bonds[n].items() if b.order != 8) - - def implicit_hydrogens(self, n: int) -> Optional[int]: - """ - Number of implicit hydrogen atoms connected to atom. - - Returns None if count are ambiguous. - """ - return self._hydrogens[n] - - @cached_args_method - def explicit_hydrogens(self, n: int) -> int: - """ - Number of explicit hydrogen atoms connected to atom. - - Take into account any type of bonds with hydrogen atoms. - """ - atoms = self._atoms - return sum(atoms[m].atomic_number == 1 for m in self._bonds[n]) - - @cached_args_method - def total_hydrogens(self, n: int) -> int: - """ - Number of hydrogen atoms connected to atom. - - Take into account any type of bonds with hydrogen atoms. - """ - return self._hydrogens[n] + self.explicit_hydrogens(n) - @cached_args_method def adjacency_matrix(self, set_bonds=False, /): """ @@ -191,24 +125,25 @@ def molecular_charge(self) -> int: """ Total charge of molecule """ - return sum(self._charges.values()) + return sum(a.charge for _, a in self.atoms()) @cached_property def is_radical(self) -> bool: """ True if at least one atom is radical """ - return any(self._radicals.values()) + return any(a.is_radical for _, a in self.atoms()) @cached_property def molecular_mass(self) -> float: - return sum(x.atomic_mass for x in self._atoms.values()) + sum(self._hydrogens.values()) * H().atomic_mass + h = _H().atomic_mass + return sum(a.atomic_mass + a.implicit_hydrogens * h for _, a in self.atoms()) @cached_property def brutto(self) -> Dict[str, int]: """Counted atoms dict""" - c = Counter(x.atomic_symbol for x in self._atoms.values()) - c['H'] += sum(self._hydrogens.values()) + c = Counter(a.atomic_symbol for _, a in self.atoms()) + c['H'] += sum(a.implicit_hydrogens for _, a in self.atoms()) return dict(c) @cached_property @@ -220,8 +155,7 @@ def aromatic_rings(self) -> Tuple[Tuple[int, ...], ...]: return tuple(ring for ring in self.sssr if bonds[ring[0]][ring[-1]] == 4 and all(bonds[n][m] == 4 for n, m in zip(ring, ring[1:]))) - def add_atom(self, atom: Union[Element, int, str], *args, charge=0, is_radical=False, - xy: Tuple[float, float] = (0., 0.), _skip_hydrogen_calculation=False, **kwargs): + def add_atom(self, atom: Union[Element, int, str], *args, _skip_calculation=False, **kwargs): """ Add new atom. """ @@ -232,27 +166,17 @@ def add_atom(self, atom: Union[Element, int, str], *args, charge=0, is_radical=F atom = Element.from_atomic_number(atom)() else: raise TypeError('Element object expected') - if not isinstance(xy, tuple) or len(xy) != 2 or not isinstance(xy[0], float) or not isinstance(xy[1], float): - raise TypeError('XY should be tuple with 2 float') - - n = super().add_atom(atom, *args, charge=charge, is_radical=is_radical, **kwargs) - self._plane[n] = xy - self._conformers.clear() # clean conformers. need full recalculation for new system - - if _skip_hydrogen_calculation: - self._hydrogens[n] = None - elif atom.atomic_number != 1: - try: - rules = atom.valence_rules(charge, is_radical, 0) - except ValenceError: - self._hydrogens[n] = None - else: - self._hydrogens[n] = rules[0][2] # first rule without neighbors + + n = super().add_atom(atom, *args, **kwargs) + if self._changed is None: + self._changed = {n} else: - self._hydrogens[n] = 0 + self._changed.add(n) + if not _skip_calculation and self._backup is None: + self.fix_structure() return n - def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_hydrogen_calculation=False): + def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_calculation=False): """ Connect atoms with bonds. @@ -263,21 +187,19 @@ def add_bond(self, n, m, bond: Union[Bond, int], *, _skip_hydrogen_calculation=F if not isinstance(bond, Bond): bond = Bond(bond) - bond._attach_graph(self, n, m) super().add_bond(n, m, bond) - self._conformers.clear() # clean conformers. need full recalculation for new system - - if _skip_hydrogen_calculation: # skip stereo fixing too - return - - self._calc_implicit(n) - self._calc_implicit(m) - - if self._atoms[n].atomic_number != 1 and self._atoms[m].atomic_number != 1: # not hydrogen - # fix stereo if formed not to hydrogen bond + if bond == 8: + return # any bond doesn't change anything + if self._changed is None: + self._changed = {n, m} + else: + self._changed.add(n) + self._changed.add(m) + if not _skip_calculation and self._backup is None: + self.fix_structure() self.fix_stereo() - def delete_atom(self, n: int, *, _skip_hydrogen_calculation=False): + def delete_atom(self, n: int, *, _skip_calculation=False): """ Remove atom. @@ -285,30 +207,20 @@ def delete_atom(self, n: int, *, _skip_hydrogen_calculation=False): Implicit hydrogens marks will not be set if atoms in aromatic rings. Call `kekule()` and `thiele()` in sequence to fix marks. """ - ngb = self._bonds.pop(n) - fix = self._atoms.pop(n).atomic_number != 1 and ngb and not _skip_hydrogen_calculation - - del self._charges[n] - del self._radicals[n] - del self._hydrogens[n] - del self._plane[n] - - for m in ngb: + del self._atoms[n] + for m, bond in self._bonds.pop(n).items(): del self._bonds[m][n] - if not _skip_hydrogen_calculation: - self._calc_implicit(m) - - self._conformers.clear() # clean conformers. need full recalculation for new system - try: - del self._parsed_mapping[n] - except KeyError: - pass - - if fix: # hydrogen atom not used for stereo coding + if bond == 8: + continue + if self._changed is None: + self._changed = {m} + else: + self._changed.add(m) + if not _skip_calculation and self._backup is None: + self.fix_structure() self.fix_stereo() - self.flush_cache() - def delete_bond(self, n: int, m: int, *, _skip_hydrogen_calculation=False): + def delete_bond(self, n: int, m: int, *, _skip_calculation=False): """ Disconnect atoms. @@ -317,136 +229,39 @@ def delete_bond(self, n: int, m: int, *, _skip_hydrogen_calculation=False): Call `kekule()` and `thiele()` in sequence to fix marks. """ del self._bonds[n][m] - del self._bonds[m][n] - self._conformers.clear() # clean conformers. need full recalculation for new system - - if not _skip_hydrogen_calculation: - self._calc_implicit(n) - self._calc_implicit(m) - - if self._atoms[n].atomic_number != 1 and self._atoms[m].atomic_number != 1 and not _skip_hydrogen_calculation: + if self._bonds[m].pop(n) != 8: + if self._changed is None: + self._changed = {n, m} + else: + self._changed.add(n) + self._changed.add(m) + if not _skip_calculation and self._backup is None: + self.fix_structure() self.fix_stereo() - self.flush_cache() - - def remap(self, mapping: Dict[int, int], *, copy: bool = False) -> 'MoleculeContainer': - atoms = self._atoms # keep original atoms dict - h = super().remap(mapping, copy=copy) - - mg = mapping.get - sp = self._plane - shg = self._hydrogens - - if copy: - h._MoleculeContainer__name = self.__name - if self.__meta is not None: - h._MoleculeContainer__meta = self.__meta.copy() - hb = h._bonds - hp = h._plane - hhg = h._hydrogens - hcf = h._conformers - hm = h._parsed_mapping - - # deep copy of bonds - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond = bond.copy() - bond._attach_graph(h, n, m) - else: - hb = {} - hp = {} - hhg = {} - hcf = [] - hm = {} - - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond - bond._change_map(n, m) - - for n in atoms: - m = mg(n, n) - hp[m] = sp[n] - hhg[m] = shg[n] - - hcf.extend({mg(n, n): x for n, x in c.items()} for c in self._conformers) - for n, m in self._parsed_mapping.items(): - hm[mg(n, n)] = m - - if copy: - return h - - self._bonds = hb - self._plane = hp - self._hydrogens = hhg - self._conformers = hcf - self._parsed_mapping = hm - return self - def copy(self) -> 'MoleculeContainer': + def copy(self, *, keep_sssr=False, keep_components=False) -> 'MoleculeContainer': copy = super().copy() - - copy._bonds = cb = {} - for n, m_bond in self._bonds.items(): - cb[n] = cbn = {} - for m, bond in m_bond.items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] - else: - cbn[m] = bond = bond.copy() - bond._attach_graph(copy, n, m) - - copy._MoleculeContainer__name = self.__name - if self.__meta is None: - copy._MoleculeContainer__meta = None + copy._name = self._name + if self._meta is None: + copy._meta = None else: - copy._MoleculeContainer__meta = self.__meta.copy() - copy._plane = self._plane.copy() - copy._hydrogens = self._hydrogens.copy() - copy._parsed_mapping = self._parsed_mapping.copy() - copy._conformers = [c.copy() for c in self._conformers] - copy._atoms_stereo = self._atoms_stereo.copy() - copy._allenes_stereo = self._allenes_stereo.copy() - copy._cis_trans_stereo = self._cis_trans_stereo.copy() + copy._meta = self._meta.copy() + + if keep_sssr: + for k, v in self.__dict__.items(): + if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', 'not_special_connectivity', 'rings_count'): + copy.__dict__[k] = v + if keep_components: + if 'connected_components' in self.__dict__: + copy.__dict__['connected_components'] = self.connected_components return copy def union(self, other: 'MoleculeContainer', *, remap: bool = False, copy: bool = True) -> 'MoleculeContainer': if not isinstance(other, MoleculeContainer): raise TypeError('MoleculeContainer expected') - u, o = super().union(other, remap=remap, copy=copy) + return super().union(other, remap=remap, copy=copy) - ub = u._bonds - for n, m_bond in o._bonds.items(): - ub[n] = ubn = {} - for m, bond in m_bond.items(): - if m in ub: # bond partially exists. need back-connection. - ubn[m] = ub[m][n] - else: - ubn[m] = bond = bond.copy() - bond._attach_graph(u, n, m) - - u._MoleculeContainer__name = u._MoleculeContainer__meta = None - u._conformers.clear() - u._plane.update(o._plane) - u._hydrogens.update(o._hydrogens) - u._parsed_mapping.update(o._parsed_mapping) - return u - - def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalculate_hydrogens=True, - skip_neighbors_marks=False, skip_hybridizations_marks=False, skip_hydrogens_marks=False, - skip_rings_sizes_marks=False, skip_heteroatoms_marks=False) -> \ - Union['MoleculeContainer', 'QueryContainer']: + def substructure(self, atoms: Iterable[int], *, recalculate_hydrogens=True) -> 'MoleculeContainer': """ Create substructure containing atoms from atoms list. @@ -455,111 +270,26 @@ def substructure(self, atoms: Iterable[int], *, as_query: bool = False, recalcul Call `kekule()` and `thiele()` in sequence to fix marks. :param atoms: list of atoms numbers of substructure - :param as_query: return Query object based on graph substructure :param recalculate_hydrogens: calculate implicit H count in substructure - :param skip_neighbors_marks: Don't set neighbors count marks on substructured queries - :param skip_hybridizations_marks: Don't set hybridizations marks on substructured queries - :param skip_hydrogens_marks: Don't set hydrogens count marks on substructured queries - :param skip_rings_sizes_marks: Don't set rings_sizes marks on substructured queries - :param skip_heteroatoms_marks: Don't set heteroatoms count marks """ if not atoms: raise ValueError('empty atoms list not allowed') if set(atoms) - self._atoms.keys(): raise ValueError('invalid atom numbers') - atoms = tuple(n for n in self._atoms if n in atoms) # save original order - if as_query: - atom_type = QueryElement - bond_type = QueryBond - sub = object.__new__(QueryContainer) - else: - atom_type = Element - bond_type = Bond - sub = object.__new__(self.__class__) - sub._MoleculeContainer__name = sub._MoleculeContainer__meta = None - - sa = self._atoms - sb = self._bonds - sc = self._charges - sr = self._radicals - - sub._charges = {n: sc[n] for n in atoms} - sub._radicals = {n: sr[n] for n in atoms} - - sub._atoms = ca = {} - for n in atoms: - ca[n] = atom = atom_type.from_atom(sa[n]) - atom._attach_graph(sub, n) - - sub._bonds = cb = {} + atoms = tuple(n for n in self if n in atoms) # save original order + sub = object.__new__(self.__class__) + sub._name = sub._meta = sub._changed = None + sub._atoms = {n: self._atoms[n].copy(hydrogens=not recalculate_hydrogens, stereo=True) for n in atoms} + sub._bonds = sb = {} for n in atoms: - cb[n] = cbn = {} - for m, bond in sb[n].items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] + sb[n] = sbn = {} + for m, bond in self._bonds[n].items(): + if m in sb: # bond partially exists. need back-connection. + sbn[m] = sb[m][n] elif m in atoms: - cbn[m] = bond = bond_type.from_bond(bond) - if not as_query: - bond._attach_graph(sub, n, m) - - if as_query: - lost = {n for n, a in sa.items() if a.atomic_number != 1} - set(atoms) # atoms not in substructure - not_skin = {n for n in atoms if lost.isdisjoint(sb[n])} - sub._atoms_stereo = {n: s for n, s in self._atoms_stereo.items() if n in not_skin} - sub._allenes_stereo = {n: s for n, s in self._allenes_stereo.items() - if not_skin.issuperset(self._stereo_allenes_paths[n]) and - not_skin.issuperset(x for x in self._stereo_allenes[n] if x)} - sub._cis_trans_stereo = {nm: s for nm, s in self._cis_trans_stereo.items() - if not_skin.issuperset(self._stereo_cis_trans_paths[nm]) and - not_skin.issuperset(x for x in self._stereo_cis_trans[nm] if x)} - - sub._masked = {n: False for n in atoms} - if skip_heteroatoms_marks: - sub._heteroatoms = {n: () for n in atoms} - else: - sha = self.heteroatoms - sub._heteroatoms = {n: (sha(n),) for n in atoms} - - if skip_hybridizations_marks: - sub._hybridizations = {n: () for n in atoms} - else: - sh = self.hybridization - sub._hybridizations = {n: (sh(n),) for n in atoms} - if skip_neighbors_marks: - sub._neighbors = {n: () for n in atoms} - else: - sn = self.neighbors - sub._neighbors = {n: (sn(n),) for n in atoms} - if skip_hydrogens_marks: - sub._hydrogens = {n: () for n in atoms} - else: - shg = self._hydrogens - sub._hydrogens = {n: () if shg[n] is None else (shg[n],) for n in atoms} - if skip_rings_sizes_marks: - sub._rings_sizes = {n: () for n in atoms} - else: - rs = self.atoms_rings_sizes - sub._rings_sizes = {n: rs.get(n, ()) for n in atoms} - else: - sub._conformers = [{n: c[n] for n in atoms} for c in self._conformers] - - if recalculate_hydrogens: - sub._hydrogens = {} - for n in atoms: - sub._calc_implicit(n) - else: - hg = self._hydrogens - sub._hydrogens = {n: hg[n] for n in atoms} - - sp = self._plane - sub._plane = {n: sp[n] for n in atoms} - sub._parsed_mapping = {n: m for n, m in self._parsed_mapping.items() if n in atoms} - - # fix_stereo will repair data - sub._atoms_stereo = self._atoms_stereo.copy() - sub._allenes_stereo = self._allenes_stereo.copy() - sub._cis_trans_stereo = self._cis_trans_stereo.copy() - sub.fix_stereo() + sbn[m] = bond.copy(stereo=True) + sub.fix_structure(recalculate_hydrogens=recalculate_hydrogens) + sub.fix_stereo() return sub def augmented_substructure(self, atoms: Iterable[int], deep: int = 1, **kwargs) -> 'MoleculeContainer': @@ -594,77 +324,46 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': """ if not isinstance(other, MoleculeContainer): raise TypeError('MoleculeContainer expected') - sa = self._atoms - sc = self._charges - sr = self._radicals - sb = self._bonds - bonds = [] adj = defaultdict(lambda: defaultdict(lambda: [None, None])) - - oa = other._atoms - oc = other._charges - or_ = other._radicals - ob = other._bonds - - common = sa.keys() & oa.keys() + common = self._atoms.keys() & other._atoms.keys() h = CGRContainer() ha = h._atoms hb = h._bonds - hc = h._charges - hpc = h._p_charges - hr = h._radicals - hpr = h._p_radicals - - for n in sa.keys() - common: # cleavage atoms - hc[n] = hpc[n] = sc[n] - hr[n] = hpr[n] = sr[n] - hb[n] = {} - ha[n] = a = DynamicElement.from_atom(sa[n]) - a._attach_graph(h, n) - for m, bond in sb[n].items(): + for n in self._atoms.keys() - common: # cleavage atoms + ha[n] = DynamicElement.from_atom(self._atoms[n]) + hb[n] = {} + for m, bond in self._bonds[n].items(): if m not in ha: if m in common: # bond to common atoms is broken bond bond = DynamicBond(bond.order, None) else: - bond = DynamicBond(bond.order, bond.order) + bond = DynamicBond.from_bond(bond) bonds.append((n, m, bond)) - for n in oa.keys() - common: # coupling atoms - hc[n] = hpc[n] = oc[n] - hr[n] = hpr[n] = or_[n] + for n in other._atoms.keys() - common: # coupling atoms + ha[n] = DynamicElement.from_atom(other._atoms[n]) hb[n] = {} - ha[n] = a = DynamicElement.from_atom(oa[n]) - a._attach_graph(h, n) - for m, bond in ob[n].items(): + for m, bond in other._bonds[n].items(): if m not in ha: if m in common: # bond to common atoms is formed bond bond = DynamicBond(None, bond.order) else: - bond = DynamicBond(bond.order, bond.order) + bond = DynamicBond.from_bond(bond) bonds.append((n, m, bond)) for n in common: an = adj[n] - for m, bond in sb[n].items(): + for m, bond in self._bonds[n].items(): if m in common: an[m][0] = bond.order - for m, bond in ob[n].items(): + for m, bond in other._bonds[n].items(): if m in common: an[m][1] = bond.order for n in common: - san = sa[n] - if san.atomic_number != oa[n].atomic_number or san.isotope != oa[n].isotope: - raise MappingError(f'atoms with number {n} not equal') - - hc[n] = sc[n] - hpc[n] = oc[n] - hr[n] = sr[n] - hpr[n] = or_[n] + ha[n] = DynamicElement.from_atoms(self._atoms[n], other._atoms[n]) hb[n] = {} - ha[n] = a = DynamicElement.from_atom(san) - a._attach_graph(h, n) for m, (o1, o2) in adj[n].items(): if m not in ha: @@ -674,22 +373,6 @@ def compose(self, other: 'MoleculeContainer') -> 'CGRContainer': hb[n][m] = hb[m][n] = bond return h - def get_fast_mapping(self, other: 'MoleculeContainer') -> Optional[Dict[int, int]]: - """ - Get self to other fast (suboptimal) structure mapping. - Only one possible atoms mapping returned. - Effective only for big molecules. - """ - if isinstance(other, MoleculeContainer): - if len(self) != len(other): - return - so = self.smiles_atoms_order - oo = other.smiles_atoms_order - if self != other: - return - return dict(zip(so, oo)) - raise TypeError('MoleculeContainer expected') - def pack(self, *, compressed=True, check=True, version=2, order: List[int] = None) -> bytes: """ Pack into compressed bytes. @@ -728,37 +411,12 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non 7 bit - zero padding. in future can be used for extra bond-level stereo, like atropoisomers. 1 bit - sign - Format V3 specification:: - - Big endian bytes order - 8 bit - 0x03 (format specification version) - Atom block 3 bytes (repeated): - 1 bit - atom entrance flag (always 1) - 7 bit - atomic number (<=118) - 3 bit - hydrogens (0-7). Note: 7 == None - 4 bit - charge (charge + 4. possible range -4 - 4) - 1 bit - radical state - 1 bit padding - 3 bit tetrahedron/allene sign - (000 - not stereo or unknown, 001 - pure-unknown-enantiomer, 010 or 011 - has stereo) - 4 bit - number of following bonds and CT blocks (0-15) - - Bond block 2 bytes (repeated 0-15 times) - 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) - 4 bit - bond order: 0000 - single, 0001 - double, 0010 - triple, 0011 - aromatic, 0111 - special - - Cis-Trans 2 bytes - 12 bit - negative shift from current atom to connected (e.g. 0x001 = -1 - connected to previous atom) - 4 bit - CT sign: 1000 or 1001 - to avoid overlap with bond - - V2 format is faster than V3. V3 format doesn't include isotopes, atom numbers and XY coordinates. - :param compressed: return zlib-compressed pack. :param check: check molecule for format restrictions. - :param version: format version + :param version: format version. Only V2 is supported. :param order: atom order in V3 """ - from ._pack import pack + from ._pack_v2 import pack as pack_v2 if check: bonds = self._bonds @@ -770,15 +428,16 @@ def pack(self, *, compressed=True, check=True, version=2, order: List[int] = Non raise ValueError('To many neighbors not supported') if version == 2: - data = pack(self) - elif version == 3: - data = self._cpack(order, check) + data = pack_v2(self) else: raise ValueError('invalid specification version') if compressed: return compress(data, 9) return data + def pach(self, *, compressed=True, check=True, version=2, order: List[int] = None) -> bytes: + return self.pack(compressed=compressed, check=check, version=version, order=order) + @classmethod def pack_len(cls, data: bytes, /, *, compressed=True) -> int: """ @@ -791,167 +450,45 @@ def pack_len(cls, data: bytes, /, *, compressed=True) -> int: return int.from_bytes(data[1:3], 'big') >> 4 @classmethod - def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, + def unpack(cls, data: Union[bytes, memoryview], /, *, compressed=True, skip_labels_calculation=False, _return_pack_length=False) -> 'MoleculeContainer': """ Unpack from compressed bytes. :param compressed: decompress data before processing. """ - from ._unpack import unpack - from ._cpack import unpack as cpack + from ._unpack_v0v2 import unpack as unpack_v0v2 if compressed: data = decompress(data) if data[0] in (0, 2): - (mapping, atom_numbers, isotopes, charges, radicals, hydrogens, plane, bonds, - atoms_stereo, allenes_stereo, cis_trans_stereo, pack_length, bonds_flat) = unpack(data) - elif data[0] == 3: - (mapping, atom_numbers, isotopes, charges, radicals, hydrogens, plane, bonds, - atoms_stereo, allenes_stereo, cis_trans_stereo, pack_length, bonds_flat) = cpack(data) + mol, cis_trans, pack_length = unpack_v0v2(data) + for n, m, s in cis_trans: + mol.bond(*mol._stereo_cis_trans_centers[n])._stereo = s else: raise ValueError('invalid pack header') - mol = object.__new__(cls) - mol._bonds = bonds - mol._plane = plane - mol._charges = charges - mol._radicals = radicals - mol._hydrogens = hydrogens - mol._atoms_stereo = atoms_stereo - mol._allenes_stereo = allenes_stereo - mol._cis_trans_stereo = cis_trans_stereo - - mol._conformers = [] - mol._parsed_mapping = {} - mol._MoleculeContainer__meta = None - mol._MoleculeContainer__name = None - mol._atoms = atoms = {} - - for n, a, i in zip(mapping, atom_numbers, isotopes): - atoms[n] = a = object.__new__(Element.from_atomic_number(a)) - a._Core__isotope = i - a._graph = ref(mol) - a._n = n - for b in bonds_flat: - b._Bond__graph = ref(mol) + if not skip_labels_calculation: + mol.calc_labels() if _return_pack_length: return mol, pack_length return mol - def _cpack(self, order=None, check=True): - if order is None: - order = list(self._atoms) - elif check: - if not isinstance(order, (list, tuple)): - raise TypeError('invalid atoms order') - elif len(so := set(order)) != len(order) or not so.issubset(self._atoms): - raise ValueError('invalid atoms order') + @classmethod + def unpach(cls, data: Union[bytes, memoryview], /, *, compressed=True) -> 'MoleculeContainer': + """ + Unpack from compressed bytes. + """ + return cls.unpack(data, compressed=compressed) - atoms = self._atoms - bonds = self._bonds - charges = self._charges - radicals = self._radicals - hydrogens = self._hydrogens - atoms_stereo = self._atoms_stereo - allenes_stereo = self._allenes_stereo - allenes_terminals = self._stereo_allenes_terminals - - cumulenes = {} - ct_map = {} - for n, m in self._cis_trans_stereo: - ct_map[n] = m - ct_map[m] = n - cumulenes[n] = [x for x, b in bonds[n].items() if b.order in (1, 4)] - cumulenes[m] = [x for x, b in bonds[m].items() if b.order in (1, 4)] - - for c in self._allenes_stereo: - n, m = allenes_terminals[c] - cumulenes[n] = [x for x, b in bonds[n].items() if b.order in (1, 4)] - cumulenes[m] = [x for x, b in bonds[m].items() if b.order in (1, 4)] - - seen = {} - data = [b'\x03'] - for i, n in enumerate(order): - seen[n] = i - env = bonds[n] - - data.append((0x80 | atoms[n].atomic_number).to_bytes(1, 'big')) - - # 3 bit - hydrogens (0-6, None) | 4 bit - charge | 1 bit - radical - hcr = (charges[n] + 4) << 1 | radicals[n] - if (h := hydrogens[n]) is None: - hcr |= 0b11100000 - else: - hcr |= h << 5 - data.append(hcr.to_bytes(1, 'big')) - - if n in atoms_stereo: - if self._translate_tetrahedron_sign(n, [x for x in order if x in env]): - s = 0b0011_0000 - else: - s = 0b0010_0000 - elif n in allenes_stereo: - t1, t2 = allenes_terminals[n] - nn = None - for x in order: - if nn is None: - if x in cumulenes[t1]: - nn = x - flag = True - elif x in cumulenes[t2]: - flag = False - nn = x - elif flag: # noqa - if x in cumulenes[t2]: - nm = x - break - elif x in cumulenes[t1]: - nm = x - break - if self._translate_allene_sign(n, nn, nm): # noqa - s = 0b0011_0000 - else: - s = 0b0010_0000 - else: - s = 0 - - tmp = [] - for m in order[:i]: - if (b := env.get(m)) is not None: - tmp.append(((i - seen[m]) << 4 | b.order - 1).to_bytes(2, 'big')) - if n in ct_map and (m := ct_map[n]) in seen: # only right atom codes stereo sign - nm = None - for x in order: - if nm is None: - if x in cumulenes[n]: - nm = x - flag = True - elif x in cumulenes[m]: - nm = x - flag = False - elif flag: # noqa - if x in cumulenes[m]: - nn = x - break - elif x in cumulenes[n]: - nn = x - break - if self._translate_cis_trans_sign(m, n, nm, nn): # noqa - cs = 0b1001 - else: - cs = 0b1000 - tmp.append(((i - seen[m]) << 4 | cs).to_bytes(2, 'big')) - - data.append((s | len(tmp)).to_bytes(1, 'big')) - data.extend(tmp) - return b''.join(data) + def __bytes__(self): + return self.pack() def _augmented_substructure(self, atoms: Iterable[int], deep: int): atoms = set(atoms) bonds = self._bonds - if atoms - self._atoms.keys(): + if atoms - bonds.keys(): raise ValueError('invalid atom numbers') nodes = [atoms] for _ in range(deep): @@ -961,81 +498,125 @@ def _augmented_substructure(self, atoms: Iterable[int], deep: int): nodes.append(n) return nodes - def _calc_implicit(self, n: int): + def fix_structure(self, recalculate_hydrogens=True): """ - Set firs possible hydrogens count based on rules + Fix molecule internal representation """ + self.calc_labels() # refresh all labels + + if recalculate_hydrogens: + for n in (self._changed or self._atoms): + self.calc_implicit(n) # fix Hs count + self._changed = None + + def calc_labels(self): atoms = self._atoms - atom = atoms[n] - if (an := atom.atomic_number) == 1: # hydrogen nether has implicit H - self._hydrogens[n] = 0 + atoms_rings_sizes = self.atoms_rings_sizes # expensive: sssr based + atoms_rings = {n: set(r) for n, r in self.atoms_rings.items()} + + for n, m_bond in self._bonds.items(): + neighbors = 0 + heteroatoms = 0 + hybridization = 1 + explicit_hydrogens = 0 + anr = atoms_rings.get(n) or False + for m, bond in m_bond.items(): + bond._in_ring = anr and (amr := atoms_rings.get(m) or False) and not anr.isdisjoint(amr) # have common rings + + if bond == 8: + continue + elif bond == 4: + hybridization = 4 + elif hybridization != 4: + if bond == 3: + hybridization = 3 + elif bond == 2: + if hybridization == 1: + hybridization = 2 + elif hybridization == 2: + hybridization = 3 + + neighbors += 1 + if (a := atoms[m]) == H: + explicit_hydrogens += 1 + elif a != C: + heteroatoms += 1 + atom = atoms[n] + atom._neighbors = neighbors + atom._heteroatoms = heteroatoms + atom._hybridization = hybridization + atom._explicit_hydrogens = explicit_hydrogens + + atom._in_ring = n in atoms_rings_sizes + atom._ring_sizes = atoms_rings_sizes.get(n) or set() + + def calc_implicit(self, n: int): + """ + Set firs possible hydrogens count based on rules + """ + if (atom := self._atoms[n]) == H: # hydrogen nether has implicit H + atom._implicit_hydrogens = 0 return - charge: int = self._charges[n] - is_radical = self._radicals[n] explicit_sum = 0 explicit_dict = defaultdict(int) aroma = 0 for m, bond in self._bonds[n].items(): - order = bond.order - if order == 4: # only neutral carbon aromatic rings supported - if not charge and not is_radical and an == 6: + if bond == 4: # only neutral carbon aromatic rings supported + if not atom.charge and not atom.is_radical and atom == C: aroma += 1 else: # use `kekule()` to calculate proper implicit hydrogens count - self._hydrogens[n] = None + atom._implicit_hydrogens = None return - elif order != 8: # any bond used for complexes - explicit_sum += order - explicit_dict[(order, atoms[m].atomic_number)] += 1 + elif bond != 8: # any bond used for complexes + explicit_sum += bond.order + explicit_dict[(bond.order, self._atoms[m].atomic_number)] += 1 if aroma == 2: if explicit_sum == 0: # H-Ar - self._hydrogens[n] = 1 + atom._implicit_hydrogens = 1 elif explicit_sum == 1: # R-Ar - self._hydrogens[n] = 0 + atom._implicit_hydrogens = 0 else: # invalid aromaticity - self._hydrogens[n] = None + atom._implicit_hydrogens = None return elif aroma == 3: # condensed rings if explicit_sum: # invalid aromaticity - self._hydrogens[n] = None + atom._implicit_hydrogens = None else: - self._hydrogens[n] = 0 + atom._implicit_hydrogens = 0 return elif aroma: - self._hydrogens[n] = None + atom._implicit_hydrogens = None return try: - rules = atom.valence_rules(charge, is_radical, explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: - self._hydrogens[n] = None + atom._implicit_hydrogens = None return for s, d, h in rules: if s.issubset(explicit_dict) and all(explicit_dict[k] >= c for k, c in d.items()): - self._hydrogens[n] = h + atom._implicit_hydrogens = h return - self._hydrogens[n] = None # rule not found + atom._implicit_hydrogens = None # rule not found - def _check_implicit(self, n: int, h: int) -> bool: - atoms = self._atoms - atom = atoms[n] - if atom.atomic_number == 1: # hydrogen nether has implicit H + def check_implicit(self, n: int, h: int) -> bool: + if (atom := self._atoms[n]) == H: # hydrogen nether has implicit H return h == 0 explicit_sum = 0 explicit_dict = defaultdict(int) for m, bond in self._bonds[n].items(): - order = bond.order - if order == 4: # can't check aromatic rings + if bond == 4: # can't check aromatic rings return False - elif order != 8: # any bond used for complexes - explicit_sum += order - explicit_dict[(order, atoms[m].atomic_number)] += 1 + elif bond != 8: # any bond used for complexes + explicit_sum += bond.order + explicit_dict[(bond.order, self._atoms[m].atomic_number)] += 1 try: - rules = atom.valence_rules(self._charges[n], self._radicals[n], explicit_sum) + rules = atom.valence_rules(explicit_sum) except ValenceError: return False for s, d, _h in rules: @@ -1043,6 +624,19 @@ def _check_implicit(self, n: int, h: int) -> bool: return True return False + def flush_cache(self, *, keep_sssr=False, keep_components=False): + backup = {} + if keep_sssr: + # good to keep if no new bonds or bonds deletions or bonds to/from any change + for k, v in self.__dict__.items(): + if k in ('sssr', 'atoms_rings', 'atoms_rings_sizes', 'not_special_connectivity', 'rings_count'): + backup[k] = v + if keep_components: + # good to keep if no new bonds or bonds deletions + if 'connected_components' in self.__dict__: + backup['connected_components'] = self.connected_components + self.__dict__ = backup + def __int__(self): """ Total charge of molecule @@ -1080,67 +674,21 @@ def __enter__(self): """ Transaction of changes. Keep current state for restoring on errors. """ - atoms = {} - for n, atom in self._atoms.items(): - atom = atom.copy() - atoms[n] = atom - atom._attach_graph(self, n) - - bonds = {} - for n, m_bond in self._bonds.items(): - bonds[n] = cbn = {} - for m, bond in m_bond.items(): - if m in bonds: # bond partially exists. need back-connection. - cbn[m] = bonds[m][n] - else: - cbn[m] = bond = bond.copy() - bond._attach_graph(self, n, m) - - self._backup = {'atoms': atoms, 'bonds': bonds, 'parsed_mapping': self._parsed_mapping.copy(), - 'plane': self._plane.copy(), 'charges': self._charges.copy(), 'radicals': self._radicals.copy(), - 'hydrogens': self._hydrogens.copy(), 'conformers': [x.copy() for x in self._conformers], - 'atoms_stereo': self._atoms_stereo.copy(), 'allenes_stereo': self._allenes_stereo.copy(), - 'cis_trans_stereo': self._cis_trans_stereo.copy()} + self._backup = self.copy(keep_sssr=True, keep_components=True) return self def __exit__(self, exc_type, exc_val, exc_tb): if exc_type: # restore state backup = self._backup - self._atoms = backup['atoms'] - self._bonds = backup['bonds'] - self._parsed_mapping = backup['parsed_mapping'] - self._plane = backup['plane'] - self._charges = backup['charges'] - self._radicals = backup['radicals'] - self._hydrogens = backup['hydrogens'] - self._conformers = backup['conformers'] - self._atoms_stereo = backup['atoms_stereo'] - self._allenes_stereo = backup['allenes_stereo'] - self._cis_trans_stereo = backup['cis_trans_stereo'] - self.flush_cache() - del self._backup - - def __getstate__(self): - return {'conformers': self._conformers, 'hydrogens': self._hydrogens, 'atoms_stereo': self._atoms_stereo, - 'allenes_stereo': self._allenes_stereo, 'cis_trans_stereo': self._cis_trans_stereo, - 'parsed_mapping': self._parsed_mapping, 'meta': self.__meta, 'name': self.__name, - 'plane': self._plane, **super().__getstate__()} - - def __setstate__(self, state): - super().__setstate__(state) - self._conformers = state['conformers'] - self._atoms_stereo = state['atoms_stereo'] - self._allenes_stereo = state['allenes_stereo'] - self._cis_trans_stereo = state['cis_trans_stereo'] - self._hydrogens = state['hydrogens'] - self._parsed_mapping = state['parsed_mapping'] - self._plane = state['plane'] - self.__meta = state['meta'] - self.__name = state['name'] - - # attach bonds to graph - for n, m, b in self.bonds(): - b._attach_graph(self, n, m) + self._atoms = backup._atoms + self._bonds = backup._bonds + self._meta = backup._meta + self._name = backup._name + self.__dict__ = backup.__dict__ + else: # update internal state + self.fix_structure() + self.fix_stereo() + self._backup = None # drop backup __all__ = ['MoleculeContainer'] diff --git a/chython/containers/query.py b/chython/containers/query.py index abe4dcaf..391bd452 100644 --- a/chython/containers/query.py +++ b/chython/containers/query.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,314 +16,51 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from itertools import chain, product -from typing import Dict, List, Tuple, Union +from typing import Tuple, Union from .bonds import Bond, QueryBond from .graph import Graph from ..algorithms.isomorphism import QueryIsomorphism -from ..algorithms.smiles import QuerySmiles -from ..algorithms.stereo import Stereo -from ..periodictable import Element, ListElement, QueryElement -from ..periodictable.element import Query +from ..periodictable import Element, QueryElement +from ..periodictable.base import Query -def _validate_neighbors(neighbors): - if neighbors is None: - neighbors = () - elif isinstance(neighbors, int): - if neighbors < 0 or neighbors > 14: - raise ValueError('neighbors should be in range [0, 14]') - neighbors = (neighbors,) - elif isinstance(neighbors, (tuple, list)): - if not all(isinstance(n, int) for n in neighbors): - raise TypeError('neighbors should be list or tuple of ints') - if any(n < 0 or n > 14 for n in neighbors): - raise ValueError('neighbors should be in range [0, 14]') - if len(set(neighbors)) != len(neighbors): - raise ValueError('neighbors should be unique') - neighbors = tuple(sorted(neighbors)) - else: - raise TypeError('neighbors should be int or list or tuple of ints') - return neighbors +class QueryContainer(Graph[Query, QueryBond], QueryIsomorphism): + __slots__ = ('_smarts',) - -class QueryContainer(Stereo, Graph[Query, QueryBond], QueryIsomorphism, QuerySmiles): - __slots__ = ('_neighbors', '_hybridizations', '_hydrogens', '_rings_sizes', '_heteroatoms', '_masked') - - _neighbors: Dict[int, Tuple[int, ...]] - _hybridizations: Dict[int, Tuple[int, ...]] - _hydrogens: Dict[int, Tuple[int, ...]] - _rings_sizes: Dict[int, Tuple[int, ...]] - _heteroatoms: Dict[int, Tuple[int, ...]] - _masked: Dict[int, bool] - - def __init__(self): + def __init__(self, smarts: str): super().__init__() - self._neighbors = {} - self._hybridizations = {} - self._hydrogens = {} - self._rings_sizes = {} - self._heteroatoms = {} - self._masked = {} - - def add_atom(self, atom: Union[Query, Element, int, str], *args, - neighbors: Union[int, List[int], Tuple[int, ...], None] = None, - hybridization: Union[int, List[int], Tuple[int, ...], None] = None, - hydrogens: Union[int, List[int], Tuple[int, ...], None] = None, - rings_sizes: Union[int, List[int], Tuple[int, ...], None] = None, - heteroatoms: Union[int, List[int], Tuple[int, ...], None] = None, - masked: bool = False, **kwargs): - if hybridization is None: - hybridization = () - elif isinstance(hybridization, int): - if hybridization < 1 or hybridization > 4: - raise ValueError('hybridization should be in range [1, 4]') - hybridization = (hybridization,) - elif isinstance(hybridization, (tuple, list)): - if not all(isinstance(h, int) for h in hybridization): - raise TypeError('hybridizations should be list or tuple of ints') - if any(h < 1 or h > 4 for h in hybridization): - raise ValueError('hybridizations should be in range [1, 4]') - if len(set(hybridization)) != len(hybridization): - raise ValueError('hybridizations should be unique') - hybridization = tuple(sorted(hybridization)) - else: - raise TypeError('hybridization should be int or list or tuple of ints') + self._smarts = smarts - if rings_sizes is None: - rings_sizes = () - elif isinstance(rings_sizes, int): - if rings_sizes < 3 and rings_sizes != 0: - raise ValueError('rings should be greater or equal 3. ring equal to zero is no ring atom mark') - rings_sizes = (rings_sizes,) - elif isinstance(rings_sizes, (tuple, list)): - if not all(isinstance(n, int) for n in rings_sizes): - raise TypeError('rings should be list or tuple of ints') - if any(n < 3 for n in rings_sizes): - raise ValueError('rings should be greater or equal 3') - if len(set(rings_sizes)) != len(rings_sizes): - raise ValueError('rings should be unique') - rings_sizes = tuple(sorted(rings_sizes)) - else: - raise TypeError('rings should be int or list or tuple of ints') + def __str__(self): + return self._smarts - neighbors = _validate_neighbors(neighbors) - hydrogens = _validate_neighbors(hydrogens) - heteroatoms = _validate_neighbors(heteroatoms) + def __repr__(self): + return f'smarts({self._smarts})' + def add_atom(self, atom: Union[Query, Element, int, str], *args, **kwargs): if not isinstance(atom, Query): + # set only basic labels: charge, radical, isotope. use Query object directly for the full control. if isinstance(atom, Element): - atom = QueryElement.from_atomic_number(atom.atomic_number)(atom.isotope) + atom = QueryElement.from_atom(atom) elif isinstance(atom, str): atom = QueryElement.from_symbol(atom)() elif isinstance(atom, int): atom = QueryElement.from_atomic_number(atom)() else: raise TypeError('QueryElement object expected') - - n = super().add_atom(atom, *args, **kwargs) - self._neighbors[n] = neighbors - self._hybridizations[n] = hybridization - self._hydrogens[n] = hydrogens - self._rings_sizes[n] = rings_sizes - self._heteroatoms[n] = heteroatoms - self._masked[n] = masked - return n + return super().add_atom(atom, *args, **kwargs) def add_bond(self, n, m, bond: Union[QueryBond, Bond, int, Tuple[int, ...]]): if isinstance(bond, Bond): bond = QueryBond.from_bond(bond) elif not isinstance(bond, QueryBond): bond = QueryBond(bond) - - sct = self._stereo_cis_trans_paths # save - sa = self._stereo_allenes_paths - super().add_bond(n, m, bond) - # remove stereo marks on bonded atoms and all its bonds - if n in self._atoms_stereo: - del self._atoms_stereo[n] - if m in self._atoms_stereo: - del self._atoms_stereo[m] - if self._cis_trans_stereo: - for nm, path in sct.items(): - if (n in path or m in path) and nm in self._cis_trans_stereo: - del self._cis_trans_stereo[nm] - if self._allenes_stereo: - for c, path in sa.items(): - if (n in path or m in path) and c in self._allenes_stereo: - del self._allenes_stereo[c] - - def copy(self) -> 'QueryContainer': - copy = super().copy() - - copy._bonds = cb = {} - for n, m_bond in self._bonds.items(): - cb[n] = cbn = {} - for m, bond in m_bond.items(): - if m in cb: # bond partially exists. need back-connection. - cbn[m] = cb[m][n] - else: - cbn[m] = bond.copy() - - copy._neighbors = self._neighbors.copy() - copy._hybridizations = self._hybridizations.copy() - copy._hydrogens = self._hydrogens.copy() - copy._heteroatoms = self._heteroatoms.copy() - copy._rings_sizes = self._rings_sizes.copy() - copy._atoms_stereo = self._atoms_stereo.copy() - copy._allenes_stereo = self._allenes_stereo.copy() - copy._cis_trans_stereo = self._cis_trans_stereo.copy() - copy._masked = self._masked.copy() - return copy def union(self, other: 'QueryContainer', *, remap: bool = False, copy: bool = True) -> 'QueryContainer': if not isinstance(other, QueryContainer): raise TypeError('QueryContainer expected') - u, o = super().union(other, remap=remap, copy=copy) - - ub = u._bonds - for n, m_bond in o._bonds.items(): - ub[n] = ubn = {} - for m, bond in m_bond.items(): - if m in ub: # bond partially exists. need back-connection. - ubn[m] = ub[m][n] - else: - ubn[m] = bond.copy() - - u._neighbors.update(o._neighbors) - u._hybridizations.update(o._hybridizations) - u._hydrogens.update(o._hydrogens) - u._rings_sizes.update(o._rings_sizes) - u._heteroatoms.update(o._heteroatoms) - u._masked.update(o._masked) - return u - - def remap(self, mapping: Dict[int, int], *, copy=False) -> 'QueryContainer': - atoms = self._atoms # keep original atoms dict - h = super().remap(mapping, copy=copy) - - mg = mapping.get - hydrogens = self._hydrogens - neighbors = self._neighbors - hybridizations = self._hybridizations - heteroatoms = self._heteroatoms - rings_sizes = self._rings_sizes - masked = self._masked - - if copy: - hb = h._bonds - hhg = h._hydrogens - hn = h._neighbors - hh = h._hybridizations - hx = h._heteroatoms - hrs = h._rings_sizes - hm = h._masked - - # deep copy of bonds - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond.copy() - else: - hb = {} - hhg = {} - hn = {} - hh = {} - hx = {} - hrs = {} - hm = {} - - for n, m_bond in self._bonds.items(): - n = mg(n, n) - hb[n] = hbn = {} - for m, bond in m_bond.items(): - m = mg(m, m) - if m in hb: # bond partially exists. need back-connection. - hbn[m] = hb[m][n] - else: - hbn[m] = bond - - for n in atoms: - m = mg(n, n) - hhg[m] = hydrogens[n] - hn[m] = neighbors[n] - hh[m] = hybridizations[n] - hx[m] = heteroatoms[n] - hrs[m] = rings_sizes[n] - hm[m] = masked[n] - - if copy: - return h # noqa - - self._bonds = hb - self._hydrogens = hhg - self._neighbors = hn - self._hybridizations = hh - self._heteroatoms = hx - self._rings_sizes = hrs - self._masked = hm - return self - - def enumerate_queries(self, *, enumerate_marks: bool = False): - """ - Enumerate complex queries into multiple simple ones. For example `[N,O]-C` into `NC` and `OC`. - - :param enumerate_marks: enumerate multiple marks to separate queries - """ - atoms = [(n, a._numbers) for n, a in self._atoms.items() if isinstance(a, ListElement)] - bonds = [(n, m, b.order) for n, m, b in self.bonds() if len(b.order) > 1] - for combo in product(*(x for *_, x in chain(atoms, bonds))): - copy = self.copy() - for (n, _), a in zip(atoms, combo): - copy._atoms[n] = a = QueryElement.from_atomic_number(a)() - a._attach_graph(copy, n) - for (n, m, _), b in zip(bonds, combo[len(atoms):]): - copy._bonds[n][m]._QueryBond__order = (b,) # noqa - - if enumerate_marks: - c = 0 - slices = [] - data = [] - for attr in ('_neighbors', '_hybridizations', '_hydrogens', '_heteroatoms', '_rings_sizes'): - tmp = [(n, v) for n, v in getattr(self, attr).items() if len(v) > 1] - if tmp: - data.extend(tmp) - slices.append((attr, c, c + len(tmp))) - c += len(tmp) - - for combo2 in product(*(x for _, x in data)): - copy2 = copy.copy() - for attr, i, j in slices: - attr = getattr(copy2, attr) - for (n, _), v in zip(data[i: j], combo2[i: j]): - attr[n] = (v,) - yield copy2 - else: - yield copy - - def __getstate__(self): - return {'atoms_stereo': self._atoms_stereo, 'allenes_stereo': self._allenes_stereo, - 'cis_trans_stereo': self._cis_trans_stereo, 'neighbors': self._neighbors, - 'hybridizations': self._hybridizations, 'hydrogens': self._hydrogens, 'masked': self._masked, - 'rings_sizes': self._rings_sizes, 'heteroatoms': self._heteroatoms, **super().__getstate__()} - - def __setstate__(self, state): - super().__setstate__(state) - self._atoms_stereo = state['atoms_stereo'] - self._allenes_stereo = state['allenes_stereo'] - self._cis_trans_stereo = state['cis_trans_stereo'] - self._neighbors = state['neighbors'] - self._hybridizations = state['hybridizations'] - self._hydrogens = state['hydrogens'] - self._rings_sizes = state['rings_sizes'] - self._heteroatoms = state['heteroatoms'] - self._masked = state['masked'] + return super().union(other, remap=remap, copy=copy) __all__ = ['QueryContainer'] diff --git a/chython/containers/reaction.py b/chython/containers/reaction.py index bbb6509f..ab55da04 100644 --- a/chython/containers/reaction.py +++ b/chython/containers/reaction.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2022 Ramil Nugmanov +# Copyright 2017-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,11 +18,10 @@ # from CachedMethods import cached_method from functools import reduce -from hashlib import sha512 from itertools import chain from math import ceil from operator import itemgetter, or_ -from typing import Dict, Iterable, Iterator, Optional, Tuple, List +from typing import Dict, Iterator, Optional, Tuple, List, Sequence from zlib import compress, decompress from .cgr import CGRContainer from .molecule import MoleculeContainer @@ -38,11 +37,10 @@ class ReactionContainer(StandardizeReaction, Mapping, Calculate2DReaction, Depic Reaction storage hashable and comparable. based on reaction unique signature (SMILES). """ - __slots__ = ('__reactants', '__products', '__reagents', '__meta', '__name', '_arrow', '_signs', '__dict__') - __class_cache__ = {} + __slots__ = ('_reactants', '_products', '_reagents', '_meta', '_name', '_arrow', '_signs', '__dict__') - def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterable[MoleculeContainer] = (), - reagents: Iterable[MoleculeContainer] = (), meta: Optional[Dict] = None, name: Optional[str] = None): + def __init__(self, reactants: Sequence[MoleculeContainer] = (), products: Sequence[MoleculeContainer] = (), + reagents: Sequence[MoleculeContainer] = (), meta: Optional[Dict] = None, name: Optional[str] = None): """ New reaction object creation @@ -58,17 +56,17 @@ def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterab if not reactants and not products and not reagents: raise ValueError('At least one graph object required') elif not all(isinstance(x, MoleculeContainer) for x in chain(reactants, products, reagents)): - raise TypeError(f'MoleculeContainers expected') + raise TypeError('MoleculeContainers expected') - self.__reactants = reactants - self.__products = products - self.__reagents = reagents + self._reactants = reactants + self._products = products + self._reagents = reagents if meta is None: - self.__meta = None + self._meta = None else: - self.__meta = dict(meta) + self._meta = dict(meta) if name is None: - self.__name = None + self._name = None else: self.name = name self._arrow = None @@ -76,21 +74,21 @@ def __init__(self, reactants: Iterable[MoleculeContainer] = (), products: Iterab @property def reactants(self) -> Tuple[MoleculeContainer, ...]: - return self.__reactants + return self._reactants @property def reagents(self) -> Tuple[MoleculeContainer, ...]: - return self.__reagents + return self._reagents @property def products(self) -> Tuple[MoleculeContainer, ...]: - return self.__products + return self._products def molecules(self) -> Iterator[MoleculeContainer]: """ Iterator of all reaction molecules """ - return chain(self.__reactants, self.__reagents, self.__products) + return chain(self.reactants, self.reagents, self.products) @property def meta(self) -> Dict: @@ -98,33 +96,33 @@ def meta(self) -> Dict: Dictionary of metadata. Like DTYPE-DATUM in RDF """ - if self.__meta is None: - self.__meta = {} # lazy - return self.__meta + if self._meta is None: + self._meta = {} # lazy + return self._meta @property def name(self) -> str: - return self.__name or '' + return self._name or '' @name.setter def name(self, name: str): if not isinstance(name, str): raise TypeError('name should be string up to 80 symbols') - self.__name = name + self._name = name def copy(self) -> 'ReactionContainer': """ Get copy of object """ copy = object.__new__(self.__class__) - copy._ReactionContainer__reactants = tuple(x.copy() for x in self.__reactants) - copy._ReactionContainer__products = tuple(x.copy() for x in self.__products) - copy._ReactionContainer__reagents = tuple(x.copy() for x in self.__reagents) - copy._ReactionContainer__name = self.__name - if self.__meta is None: - copy._ReactionContainer__meta = None + copy._reactants = tuple(x.copy() for x in self.reactants) + copy._products = tuple(x.copy() for x in self.products) + copy._reagents = tuple(x.copy() for x in self.reagents) + copy._name = self._name + if self._meta is None: + copy._meta = None else: - copy._ReactionContainer__meta = self.__meta.copy() + copy._meta = self._meta.copy() copy._arrow = self._arrow copy._signs = self._signs return copy @@ -137,23 +135,24 @@ def compose(self) -> CGRContainer: Reagents will be presented as unchanged molecules :return: CGRContainer """ - rr = self.__reagents + self.__reactants + rr = self.reagents + self.reactants if rr: r = reduce(or_, rr) else: r = MoleculeContainer() - if self.__products: - p = reduce(or_, self.__products) + if self.products: + p = reduce(or_, self.products) else: p = MoleculeContainer() return r ^ p - def flush_cache(self): + def flush_cache(self, keep_molecule_cache=False, **kwargs): self.__dict__.clear() - for m in self.molecules(): - m.flush_cache() + if not keep_molecule_cache: + for m in self.molecules(): + m.flush_cache(**kwargs) - def pack(self, *, compressed=True, check=True): + def pack(self, *, compressed=True, check=True) -> bytes: """ Pack into compressed bytes. @@ -172,12 +171,18 @@ def pack(self, *, compressed=True, check=True): :param compressed: return zlib-compressed pack. :param check: check molecules for format restrictions. """ - data = b''.join((bytearray((1, len(self.__reactants), len(self.__reagents), len(self.__products))), + data = b''.join((bytearray((1, len(self.reactants), len(self.reagents), len(self.products))), *(m.pack(compressed=False, check=check) for m in self.molecules()))) if compressed: return compress(data, 9) return data + def pach(self, *, compressed=True, check=True) -> bytes: + """ + Pack into compressed bytes. + """ + return self.pack(compressed=compressed, check=check) + @classmethod def pack_len(cls, data: bytes, /, *, compressed=True) -> Tuple[List[int], List[int], List[int]]: """ @@ -225,7 +230,7 @@ def unpack(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': raise ValueError('invalid pack header') reactants, reagents, products = data[1], data[2], data[3] - molecules = [] + molecules: List[MoleculeContainer] = [] shift = 4 for _ in range(reactants + reagents + products): m, pl = MoleculeContainer.unpack(data[shift:], compressed=False, _return_pack_length=True) @@ -233,6 +238,16 @@ def unpack(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': shift += pl return cls(molecules[:reactants], molecules[-products:], molecules[reactants: -products]) + @classmethod + def unpach(cls, data: bytes, /, *, compressed=True) -> 'ReactionContainer': + """ + Unpack from compressed bytes. + """ + return cls.unpack(data, compressed=compressed) + + def __bytes__(self): + return self.pack() + def __invert__(self) -> CGRContainer: """ Get CGR of reaction @@ -246,15 +261,11 @@ def __eq__(self, other): def __hash__(self): return hash(str(self)) - @cached_method - def __bytes__(self): - return sha512(str(self).encode()).digest() - def __bool__(self): """ Exists both reactants and products """ - return bool(self.__reactants and self.__products) + return bool(self.reactants and self.products) @cached_method def __str__(self): @@ -277,9 +288,9 @@ def __format__(self, format_spec): sig = [] count = 0 contract = [] - orders = [] + radicals = [] - for ml in (self.__reactants, self.__reagents, self.__products): + for ml in (self.reactants, self.reagents, self.products): mso = [(m, *m.__format__(format_spec, _return_order=True)) for m in ml] if not format_spec or '!c' not in format_spec: mso.sort(key=itemgetter(1)) @@ -292,13 +303,13 @@ def __format__(self, format_spec): else: count += 1 - orders.append((m, o)) + radicals.extend(m.atom(n).is_radical for n in o) ss.append(s) sig.append('.'.join(ss)) if not format_spec or '!x' not in format_spec: cx = [] - if r := ','.join(str(n) for n, (m, a) in enumerate((m, a) for m, o in orders for a in o) if m._radicals[a]): + if r := ','.join(str(n) for n, r in enumerate(radicals) if r): cx.append(f'^1:{r}') if contract: cx.append(f"f:{','.join('.'.join(x) for x in contract)}") @@ -306,29 +317,8 @@ def __format__(self, format_spec): return f"{'>'.join(sig)} |{','.join(cx)}|" return '>'.join(sig) - @cached_method def __len__(self): - return len(self.__reactants) + len(self.__products) + len(self.__reagents) - - def __getstate__(self): - state = {'reactants': self.__reactants, 'products': self.__products, 'reagents': self.__reagents, - 'meta': self.__meta, 'name': self.__name, 'arrow': self._arrow, 'signs': self._signs} - from chython import pickle_cache - - if pickle_cache: - state['cache'] = self.__dict__ - return state - - def __setstate__(self, state): - self.__reactants = state['reactants'] - self.__products = state['products'] - self.__reagents = state['reagents'] - self.__meta = state['meta'] - self.__name = state['name'] - self._arrow = state['arrow'] - self._signs = state['signs'] - if 'cache' in state: - self.__dict__.update(state['cache']) + return len(self.reactants) + len(self.products) + len(self.reagents) __all__ = ['ReactionContainer'] diff --git a/chython/exceptions.py b/chython/exceptions.py index 891340fc..6f47d503 100644 --- a/chython/exceptions.py +++ b/chython/exceptions.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -66,30 +66,6 @@ class InvalidAromaticRing(ValueError): """ -class IsConnectedAtom(Exception): - """ - Atom is already attached to graph - """ - - -class IsNotConnectedAtom(Exception): - """ - Atom is not attached to graph - """ - - -class IsConnectedBond(Exception): - """ - Bond is already attached to graph - """ - - -class IsNotConnectedBond(Exception): - """ - Bond is not attached to graph - """ - - class ValenceError(Exception): """ Atom has valence error diff --git a/chython/files/MRVrw.py b/chython/files/MRVrw.py index c8db572a..543f33dd 100644 --- a/chython/files/MRVrw.py +++ b/chython/files/MRVrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -24,12 +24,11 @@ from typing import Union, List, Iterator, Dict, Optional from ._convert import create_molecule, create_reaction from ._mapping import postprocess_parsed_molecule, postprocess_parsed_reaction -from ._mdl import postprocess_molecule +from .mdl import postprocess_molecule from ..containers import MoleculeContainer, ReactionContainer from ..exceptions import EmptyMolecule, EmptyReaction -organic_set = {'B', 'C', 'N', 'O', 'P', 'S', 'Se', 'F', 'Cl', 'Br', 'I'} bond_map = {8: '1" queryType="Any', 4: 'A', 1: '1', 2: '2', 3: '3', 'Any': 8, 'any': 8, 'A': 4, 'a': 4, '1': 1, '2': 2, '3': 3} @@ -138,14 +137,14 @@ def read_structure(self, *, current: bool = True): postprocess_parsed_molecule(tmp, remap=self.__remap, ignore=self.__ignore) parse_sgroup(data, tmp) mol = create_molecule(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self.__ignore, ignore_stereo=self.__ignore_stereo, - calc_cis_trans=self.__calc_cis_trans) - mol.meta.update(meta) + if not self.__ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) + if meta: + mol.meta.update(meta) return mol elif 'reaction' in data and isinstance(data['reaction'], dict): data = data['reaction'] - tmp = {'reactants': [], 'products': [], 'reagents': [], - 'meta': None, 'log': log, 'title': data.get('@title')} + tmp = {'reactants': [], 'products': [], 'reagents': [], 'log': log, 'title': data.get('@title')} n = 0 for tag, group in (('reactantList', 'reactants'), ('productList', 'products'), ('agentList', 'reagents')): @@ -171,10 +170,11 @@ def read_structure(self, *, current: bool = True): postprocess_parsed_reaction(tmp, remap=self.__remap, ignore=self.__ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=self.__ignore_bad_isotopes, _m_cls=self.molecule_cls, _r_cls=self.reaction_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=self.__ignore, ignore_stereo=self.__ignore_stereo, - calc_cis_trans=self.__calc_cis_trans) - rxn.meta.update(meta) + if not self.__ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=self.__calc_cis_trans) + if meta: + rxn.meta.update(meta) return rxn else: raise ValueError('reaction or molecule expected') @@ -263,7 +263,6 @@ def _read_block(self, *, current: bool = True) -> dict: def parse_molecule(data): atoms, bonds, stereo = [], [], [] log = [] - hydrogens = {} atom_map = {} if 'atom' in data['atomArray']: da = data['atomArray']['atom'] @@ -275,20 +274,20 @@ def parse_molecule(data): 'isotope': int(atom['@isotope']) if '@isotope' in atom else None, 'charge': int(atom.get('@formalCharge', 0)), 'is_radical': '@radical' in atom, - 'mapping': int(atom.get('@mrvMap', 0))}) + 'parsed_mapping': int(atom.get('@mrvMap', 0))}) if '@z3' in atom: atoms[-1].update(x=float(atom['@x3']), y=float(atom['@y3']), z=float(atom['@z3'])) else: - atoms[-1].update(x=float(atom['@x2']) / 2, y=float(atom['@y2']) / 2, z=0.) + atoms[-1].update(x=float(atom['@x2']) / 2, y=float(atom['@y2']) / 2) if '@mrvQueryProps' in atom: raise ValueError('queries unsupported') if '@hydrogenCount' in atom: - hydrogens[n] = int(atom['@hydrogenCount']) + atoms[-1]['implicit_hydrogens'] = int(atom['@hydrogenCount']) else: atom = data['atomArray'] for n, (_id, e) in enumerate(zip(atom['@atomID'].split(), atom['@elementType'].split())): atom_map[_id] = n - atoms.append({'element': e, 'charge': 0, 'mapping': 0, 'isotope': None, 'is_radical': False}) + atoms.append({'element': e}) if '@z3' in atom: for a, x, y, z in zip(atoms, atom['@x3'].split(), atom['@y3'].split(), atom['@z3'].split()): a['x'] = float(x) @@ -298,7 +297,6 @@ def parse_molecule(data): for a, x, y in zip(atoms, atom['@x2'].split(), atom['@y2'].split()): a['x'] = float(x) / 2 a['y'] = float(y) / 2 - a['z'] = 0. if '@isotope' in atom: for a, x in zip(atoms, atom['@isotope'].split()): if x != '0': @@ -310,7 +308,7 @@ def parse_molecule(data): if '@mrvMap' in atom: for a, x in zip(atoms, atom['@mrvMap'].split()): if x != '0': - a['mapping'] = int(x) + a['parsed_mapping'] = int(x) if '@radical' in atom: for a, x in zip(atoms, atom['@radical'].split()): if x != '0': @@ -340,8 +338,8 @@ def parse_molecule(data): log.append('incorrect bondStereo tag') bonds.append((atom_map[a1], atom_map[a2], order)) - return {'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': None, 'title': data.get('@title'), 'log': log, 'atom_map': atom_map} + return {'atoms': atoms, 'bonds': bonds, 'stereo': stereo, + 'title': data.get('@title'), 'log': log, 'atom_map': atom_map} def parse_sgroup(data, molecule): @@ -486,30 +484,24 @@ def __write(self, data): file.write('\n') def __write_molecule(self, g): - gp = g._plane - gc = g._charges - gr = g._radicals bg = g._bonds - hg = g._hydrogens - hb = g.hybridization mapping = self.__mapping file = self.__file file.write('') - for n, atom in g._atoms.items(): - x, y = gp[n] - ih = hg[n] + for n, atom in g.atoms(): + x, y = atom.x, atom.y file.write(f'') file.write('') diff --git a/chython/files/PDBrw.py b/chython/files/PDBrw.py index a761e3cb..01ad869f 100644 --- a/chython/files/PDBrw.py +++ b/chython/files/PDBrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -178,6 +178,8 @@ def read_structure(self, *, current: bool = True) -> MoleculeContainer: atom_charge=charges, _cls=self.molecule_cls) mol.meta['RESIDUE'] = dict(enumerate(res, 1)) + if log: + mol.meta['chython_parsing_log'] = log if self.__parse_as_single: self.__parsed_first = mol.copy() return mol @@ -191,6 +193,11 @@ def read_structure(self, *, current: bool = True) -> MoleculeContainer: c[n] = (x, y, z) mol = self.__parsed_first.copy() mol._conformers[0] = c + if log: + if 'chython_parsing_log' in mol.meta: + mol.meta['chython_parsing_log'] = mol.meta['chython_parsing_log'] + log + else: + mol.meta['chython_parsing_log'] = log return mol def close(self, force: bool = False): diff --git a/chython/files/RDFrw.py b/chython/files/RDFrw.py index 0d4475bc..9e8a20f2 100644 --- a/chython/files/RDFrw.py +++ b/chython/files/RDFrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # Copyright 2019 Dinar Batyrshin # This file is part of chython. # @@ -25,8 +25,8 @@ from sys import platform from time import strftime from typing import Union, Dict, List -from ._mdl import (MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, parse_rxn_v2000, parse_rxn_v3000, - postprocess_molecule) +from .mdl import (MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, parse_rxn_v2000, parse_rxn_v3000, + postprocess_molecule) from ._convert import create_molecule, create_reaction from ._mapping import postprocess_parsed_molecule, postprocess_parsed_reaction from ..containers import ReactionContainer, MoleculeContainer @@ -74,9 +74,9 @@ def read_structure(self, *, current=True) -> Union[ReactionContainer, MoleculeCo postprocess_parsed_reaction(tmp, remap=self._remap, ignore=self._ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _m_cls=self.molecule_cls, _r_cls=self.reaction_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) if meta: rxn.meta.update(meta) return rxn @@ -87,8 +87,8 @@ def read_structure(self, *, current=True) -> Union[ReactionContainer, MoleculeCo postprocess_parsed_molecule(tmp) mol = create_molecule(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) if meta: mol.meta.update(meta) return mol @@ -289,9 +289,9 @@ def mdl_rxn(data: str, /, *, ignore=True, calc_cis_trans=False, ignore_stereo=Fa postprocess_parsed_reaction(tmp, remap=remap, ignore=ignore) rxn = create_reaction(tmp, ignore_bad_isotopes=ignore_bad_isotopes, _m_cls=_m_cls, _r_cls=_r_cls) - for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - calc_cis_trans=calc_cis_trans) + if not ignore_stereo: + for mol, tmp in zip(rxn.molecules(), chain(tmp['reactants'], tmp['reagents'], tmp['products'])): + postprocess_molecule(mol, tmp, calc_cis_trans=calc_cis_trans) return rxn diff --git a/chython/files/SDFrw.py b/chython/files/SDFrw.py index 6ef8e638..232f3fe6 100644 --- a/chython/files/SDFrw.py +++ b/chython/files/SDFrw.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -23,7 +23,7 @@ from subprocess import check_output from sys import platform from typing import Optional, List -from ._mdl import MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, postprocess_molecule +from .mdl import MDLRead, MOLWrite, EMOLWrite, parse_mol_v2000, parse_mol_v3000, postprocess_molecule from ._convert import create_molecule from ._mapping import postprocess_parsed_molecule from ..containers import MoleculeContainer @@ -71,8 +71,8 @@ def read_structure(self, *, current=True) -> MoleculeContainer: postprocess_parsed_molecule(tmp, remap=self._remap, ignore=self._ignore) mol = create_molecule(tmp, ignore_bad_isotopes=self._ignore_bad_isotopes, _cls=self.molecule_cls) - postprocess_molecule(mol, tmp, ignore=self._ignore, ignore_stereo=self._ignore_stereo, - calc_cis_trans=self._calc_cis_trans) + if not self._ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=self._calc_cis_trans) meta = self.read_metadata() if meta: mol.meta.update(meta) @@ -213,8 +213,8 @@ def mdl_mol(data: str, /, *, ignore=True, calc_cis_trans=False, ignore_stereo=Fa postprocess_parsed_molecule(tmp, remap=remap, ignore=ignore) mol = create_molecule(tmp, ignore_bad_isotopes=ignore_bad_isotopes, _cls=_cls) - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - calc_cis_trans=calc_cis_trans) + if not ignore_stereo: + postprocess_molecule(mol, tmp, calc_cis_trans=calc_cis_trans) return mol diff --git a/chython/files/_convert.py b/chython/files/_convert.py index 2de1ff2b..e25a93cd 100644 --- a/chython/files/_convert.py +++ b/chython/files/_convert.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2023 Ramil Nugmanov +# Copyright 2023, 2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -22,31 +22,38 @@ from ..periodictable import Element -def create_molecule(data, *, skip_calc_implicit=False, ignore_bad_isotopes=False, _cls=MoleculeContainer): - g = object.__new__(_cls) - pm = {} - atoms = {} - plane = {} - charges = {} - radicals = {} - bonds = {} +# atomic number constants +B = 5 +C = 6 +N = 7 +P = 15 + + +def create_molecule(data, *, ignore_bad_isotopes=False, skip_calc_implicit=False, + keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, ignore=True, + ignore_carbon_radicals=False, _cls=MoleculeContainer): + g = _cls() + g._name = data.get('title') + atoms = g._atoms + bonds = g._bonds mapping = data['mapping'] - for n, atom in enumerate(data['atoms']): - n = mapping[n] - e = Element.from_symbol(atom['element']) + + if any(a.get('z') for a in data['atoms']): + # store conformer + g._conformers = [{n: (a['x'], a['y'], a['z']) for n, a in zip(mapping, data['atoms'])}] + + for n, atom in zip(mapping, data['atoms']): + e = Element.from_symbol(atom.pop('element')) + atom.pop('z', None) # clean up MDL try: - atoms[n] = e(atom['isotope']) - except ValueError: + atoms[n] = e(**atom) + except (ValueError, TypeError): if not ignore_bad_isotopes: raise - atoms[n] = e() # reset isotope mark on errors. + del atom['isotope'] # reset isotope mark on errors and try again. + atoms[n] = e(**atom) bonds[n] = {} - if (charge := atom['charge']) > 4 or charge < -4: - raise ValueError('formal charge should be in range [-4, 4]') - charges[n] = charge - radicals[n] = atom['is_radical'] - plane[n] = (atom['x'], atom['y']) - pm[n] = atom['mapping'] + for n, m, b in data['bonds']: n, m = mapping[n], mapping[m] if n == m: @@ -56,27 +63,117 @@ def create_molecule(data, *, skip_calc_implicit=False, ignore_bad_isotopes=False if n in bonds[m]: raise ValueError('atoms already bonded') bonds[n][m] = bonds[m][n] = Bond(b) - if any(a['z'] for a in data['atoms']): - conformers = [{mapping[n]: (a['x'], a['y'], a['z']) for n, a in enumerate(data['atoms'])}] - else: - conformers = [] - if data['log']: # store log to the meta - if data['meta'] is None: + g.calc_labels() # set all labels except rings + + if data.get('log'): # store log to the meta + if data.get('meta') is None: data['meta'] = {} data['meta']['chython_parsing_log'] = data['log'] + g._meta = data.get('meta') or None + + if skip_calc_implicit: # don't calc Hs. e.g. INCHI + return g + + implicit_mismatch = {} + radicalized = [] + # precalculate Hs + for n, a in atoms.items(): + if a.implicit_hydrogens is None: + # let's try to calculate. in case of errors just keep as is. radicals in smiles should be in [brackets], + # thus has implicit Hs value + g.calc_implicit(n) + elif keep_implicit: + # keep given Hs count as is + continue + else: # recheck given Hs count + h = a.implicit_hydrogens # parsed Hs + g.calc_implicit(n) # recalculate + if a.implicit_hydrogens is None: # atom has invalid valence or aromatic ring. + if a.hybridization == 4: + # this is aromatic ring. just restore given H count. + a._implicit_hydrogens = h + # rare H0 case + if (not keep_radicals and not ignore_aromatic_radicals + and not h and not a.charge and not a.is_radical and a in (B, C, N, P) + and sum(b != 8 for b in bonds[n].values()) == 2): + # c[c]c - aromatic B,C,N,P radical + a._is_radical = True + radicalized.append(n) + elif not keep_radicals and not a.is_radical: # CXSMILES radical not set. + # SMILES doesn't code radicals. so, let's try to guess. + a._is_radical = True + if g.check_implicit(n, h): # radical form is valid + radicalized.append(n) + a._implicit_hydrogens = h + elif ignore: # radical state also has errors. + a._is_radical = False # reset radical state + implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif h != a.implicit_hydrogens: # H count mismatch. + if a.hybridization == 4: + if (not keep_radicals + and not h and not a.charge and not a.is_radical and a in (B, C, N, P) + and sum(b != 8 for b in bonds[n].values()) == 2): + # c[c]c - aromatic B,C,N,P radical + a._implicit_hydrogens = 0 + a._is_radical = True + radicalized.append(n) + elif ignore: + implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif g.check_implicit(n, h): # set another possible implicit state. probably Al, P + a._implicit_hydrogens = h + elif not keep_radicals and not a.is_radical: # CXSMILES radical is not set. try radical form + a._is_radical = True + if g.check_implicit(n, h): + a._implicit_hydrogens = h + radicalized.append(n) + # radical state also has errors. + elif ignore: + a._is_radical = False # reset radical state + implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + elif ignore: # just ignore it + implicit_mismatch[n] = h + if data.get('log') is None: + data['log'] = [] + data['log'].append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') + else: + raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - g.__setstate__({'atoms': atoms, 'bonds': bonds, 'meta': data['meta'], 'plane': plane, 'parsed_mapping': pm, - 'charges': charges, 'radicals': radicals, 'name': data['title'], 'conformers': conformers, - 'atoms_stereo': {}, 'allenes_stereo': {}, 'cis_trans_stereo': {}, 'hydrogens': {}}) - if not skip_calc_implicit: - for n in atoms: - g._calc_implicit(n) + if ignore_carbon_radicals: + for n in radicalized: + if (a := atoms[n]) == C: + a._is_radical = False + a._implicit_hydrogens += 1 + if data.get('log') is None: + data['log'] = [] + data['log'].append(f'carbon radical {n} replaced with implicit hydrogen') + elif radicalized: + g.meta['chython_radicalized_atoms'] = radicalized + if data.get('log') and 'chython_parsing_log' not in g.meta: + g.meta['chython_parsing_log'] = data['log'] + if implicit_mismatch: + g.meta['chython_implicit_mismatch'] = implicit_mismatch return g def create_reaction(data, *, ignore=True, skip_calc_implicit=False, ignore_bad_isotopes=False, - _r_cls=ReactionContainer, _m_cls=MoleculeContainer): + keep_implicit=False, keep_radicals=True, ignore_aromatic_radicals=True, + ignore_carbon_radicals=False, _r_cls=ReactionContainer, _m_cls=MoleculeContainer): rc, pr, rg = [], [], [] for ms, pms, gr in ((rc, data['reactants'], 'reactant'), (pr, data['products'], 'products'), @@ -85,21 +182,26 @@ def create_reaction(data, *, ignore=True, skip_calc_implicit=False, ignore_bad_i for n, m in enumerate(pms): try: ms.append(create_molecule(m, skip_calc_implicit=skip_calc_implicit, - ignore_bad_isotopes=ignore_bad_isotopes, _cls=_m_cls)) + ignore_bad_isotopes=ignore_bad_isotopes, keep_implicit=keep_implicit, + keep_radicals=keep_radicals, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + ignore_carbon_radicals=ignore_carbon_radicals, _cls=_m_cls)) except ValueError as e: if not ignore: raise + if data.get('log') is None: + data['log'] = [] data['log'].append(f'ignored {gr} molecule {n} with {e}') tdl.append(n) if tdl: # ad-hoc for later postprocessing for n in reversed(tdl): del pms[n] - if data['log']: # store log to the meta - if data['meta'] is None: + if data.get('log'): # store log to the meta + if data.get('meta') is None: data['meta'] = {} data['meta']['chython_parsing_log'] = data['log'] - return _r_cls(rc, pr, rg, meta=data['meta'], name=data['title']) + return _r_cls(rc, pr, rg, meta=data.get('meta') or None, name=data.get('title')) __all__ = ['create_molecule'] diff --git a/chython/files/_mapping.py b/chython/files/_mapping.py index e8d5915c..c142676e 100644 --- a/chython/files/_mapping.py +++ b/chython/files/_mapping.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -24,16 +24,18 @@ def postprocess_parsed_molecule(data, *, remap=False, ignore=True): if remap: remapped = list(range(1, len(data['atoms']) + 1)) else: - length = count(max(x['mapping'] for x in data['atoms']) + 1) + length = count(max(x.get('parsed_mapping') or 0 for x in data['atoms']) + 1) remapped, used = [], set() for n, atom in enumerate(data['atoms']): - m = atom['mapping'] + m = atom.get('parsed_mapping') if not m: remapped.append(next(length)) elif m in used: if not ignore: raise MappingError('mapping in molecules should be unique') remapped.append(next(length)) + if data.get('log') is None: + data['log'] = [] data['log'].append(f'mapping in molecule changed from {m} to {remapped[n]}') else: remapped.append(m) @@ -47,7 +49,7 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): for molecule in data[i]: used = set() for atom in molecule['atoms']: - m = atom['mapping'] + m = atom.get('parsed_mapping') if m: if m in used: if not ignore: @@ -72,6 +74,8 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): raise MappingError('mapping in reagents or products or reactants should be unique') # force remap non unique atoms in molecules. _remap.append(next(length)) + if data.get('log') is None: + data['log'] = [] data['log'].append(f'mapping in {i} changed from {m} to {_remap[-1]}') else: _remap.append(m) @@ -83,6 +87,8 @@ def postprocess_parsed_reaction(data, *, remap=False, ignore=True): e = f'reagents has map intersection with reactants or products: {tmp}' if not ignore: raise MappingError(e) + if data.get('log') is None: + data['log'] = [] data['log'].append(e) maps['reagents'] = [x if x not in tmp else next(length) for x in maps['reagents']] diff --git a/chython/files/_mdl/stereo.py b/chython/files/_mdl/stereo.py deleted file mode 100644 index 67dd52aa..00000000 --- a/chython/files/_mdl/stereo.py +++ /dev/null @@ -1,95 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020-2023 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from ...exceptions import NotChiral, IsChiral, ValenceError - - -def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, calc_cis_trans=False, - keep_implicit=False): - mapping = data['mapping'] - hydrogens = molecule._hydrogens - hyb = molecule.hybridization - - implicit_mismatch = {} - if 'chython_parsing_log' in molecule.meta: - log = molecule.meta['chython_parsing_log'] - else: - log = [] - - for n, h in data['hydrogens'].items(): - n = mapping[n] - if keep_implicit: # override any calculated hydrogens count. - hydrogens[n] = h - if (hc := hydrogens[n]) is None: # aromatic rings or valence errors - if hyb(n) == 4: # this is aromatic rings. just store given H count. - hydrogens[n] = h - elif hc != h: - if hyb(n) == 4: - if ignore: - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif molecule._check_implicit(n, h): # set another possible implicit state. probably Al, P - hydrogens[n] = h - elif ignore: # just ignore it - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - - if implicit_mismatch: - molecule.meta['chython_implicit_mismatch'] = implicit_mismatch - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log - if ignore_stereo: - return - - if calc_cis_trans: - molecule.calculate_cis_trans_from_2d() - - stereo = [(mapping[n], mapping[m], s) for n, m, s in data['stereo']] - while stereo: - fail_stereo = [] - old_stereo = len(stereo) - for n, m, s in stereo: - try: - molecule.add_wedge(n, m, s, clean_cache=False) - except NotChiral: - fail_stereo.append((n, m, s)) - except IsChiral: - pass - except ValenceError: - log.append('structure has errors, stereo data skipped') - molecule.flush_cache() - break - else: - stereo = fail_stereo - if len(stereo) == old_stereo: - break - molecule.flush_stereo_cache() - if calc_cis_trans: - molecule.calculate_cis_trans_from_2d(clean_cache=False) - continue - break - - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log - - -__all__ = ['postprocess_molecule'] diff --git a/chython/files/daylight/parser.py b/chython/files/daylight/parser.py index 3cab6272..f45d020c 100644 --- a/chython/files/daylight/parser.py +++ b/chython/files/daylight/parser.py @@ -37,6 +37,7 @@ def parser(tokens, strong_cycle): last_num = 0 stack = [] cycles = {} + stereo_atoms = {} stereo_bonds = defaultdict(dict) previous = None @@ -135,6 +136,8 @@ def parser(tokens, strong_cycle): # else bt == 4 - skip dot previous = None + if token.get('stereo') is not None: + stereo_atoms[atom_num] = token.pop('stereo') atoms.append(token) atoms_types.append(token_type) last_num = atom_num @@ -147,8 +150,8 @@ def parser(tokens, strong_cycle): elif previous: raise IncorrectSmiles('bond on the end') - return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_bonds': stereo_bonds, 'log': log, - 'title': None, 'meta': None} + return {'atoms': atoms, 'bonds': bonds, 'order': order, 'stereo_atoms': stereo_atoms, + 'stereo_bonds': stereo_bonds, 'log': log} __all__ = ['parser'] diff --git a/chython/files/daylight/smarts.py b/chython/files/daylight/smarts.py index 2885b8a2..40c3cd58 100644 --- a/chython/files/daylight/smarts.py +++ b/chython/files/daylight/smarts.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022-2024 Ramil Nugmanov +# Copyright 2022-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,17 +16,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from functools import partial from itertools import count from re import compile, findall, search from .parser import parser from .tokenize import smarts_tokenize -from ...containers import QueryContainer -from ...periodictable import QueryElement +from ...containers import QueryContainer, QueryBond +from ...periodictable import ListElement, QueryElement cx_radicals = compile(r'\^[1-7]:[0-9]+(?:,[0-9]+)*') -cx_hh = compile(r'atomProp(:[0-9]+\.(?:hyb|het|msk)\.[0-9]+)+') -hybridization = {'4': 4, '3': 1, '2': 2, '1': 3} # AD-HOC for masked atoms in SMARTS # not thread safe @@ -37,28 +36,23 @@ def smarts(data: str): """ Parse SMARTS string. - * stereo ignored. * only D, a, h, r and !R atom primitives supported. - * bond order list and not bond supported. + * bond order list (max 2) and not bond supported. * [not]ring bond supported only in combination with explicit bonds, not bonds and bonds orders lists. * mapping, charge and isotopes supported. * list of elements supported. * A - treats as any element. primitive (aliphatic) ignored. - * M - treats as any metal.. + * M - treats as any metal * <&> logic operator unsupported. * <;> logic operator is mandatory except (however preferable) for charge, isotope, stereo marks. * CXSMARTS radicals supported. - * hybridization and heteroatoms count in CXSMARTS atomProp notation coded as and keys. * masked atom - `chython.Reactor` specific mark for masking reactant atoms from deletion. - Coded in CXSMARTS atomProp as key with any value. For example:: - [C;r5,r6;a]-;!@[C;h1,h2] |^1:1,atomProp:1.hyb.24:1.het.0| - aromatic C member of 5 or 6 atoms ring + [C;r5,r6;a]-;!@[C;h1,h2;z2,z4] |^1:1| - aromatic C member of 5 or 6 atoms ring connected with non-ring single bond to aromatic or SP2 radical C with 1 or 2 hydrogens. - Alternative hybridization, heteroatoms and masks coding: - * primitive - heteroatoms (e.g. x2 - two heteroatoms) * primitive - hybridization (N = 1 - sp3, 2 - sp2, 3 - sp, 4 - aromatic) * primitive - masked atom @@ -71,51 +65,41 @@ def smarts(data: str): raise TypeError('Must be a SMARTS string') smr, *cx = data.split() - hyb = {} - het = {} - msk = [] - if cx and cx[0].startswith('|') and cx[0].endswith('|'): - radicals = [int(x) for x in findall(cx_radicals, cx[0]) for x in x[3:].split(',')] - - if hh := search(cx_hh, cx[0]): - for x in hh.group().split(':')[1:]: - i, h, v = x.split('.') - i = int(i) - if h == 'hyb': - hyb[i] = [hybridization[x] for x in v] - elif h == 'het': - het[i] = [int(y) for y in v] - else: - msk.append(i) - else: - radicals = [] + parsed = parser(smarts_tokenize(smr), False) - data = parser(smarts_tokenize(smr), False) + if cx and cx[0].startswith('|') and cx[0].endswith('|'): + for x in findall(cx_radicals, cx[0]): + for i in x[3:].split(','): + parsed['atoms'][int(i)]['is_radical'] = True - for x in radicals: - data['atoms'][x]['is_radical'] = True - for i, v in hyb.items(): - data['atoms'][i]['hybridization'] = v - for i, v in het.items(): - data['atoms'][i]['heteroatoms'] = v - for i in msk: - data['atoms'][i]['masked'] = True + for i, s in parsed['stereo_atoms'].items(): + parsed['atoms'][i]['stereo'] = s + stereo_bonds = parsed['stereo_bonds'] - g = QueryContainer() + g = QueryContainer(data) mapping = {} - free = count(max(a['mapping'] for a in data['atoms']) + 1) - for i, a in enumerate(data['atoms']): - mapping[i] = n = a.pop('mapping') or next(global_free_masked if a['masked'] else free) + free = count(max(a.get('parsed_mapping', 0) for a in parsed['atoms']) + 1) + for i, a in enumerate(parsed['atoms']): + mapping[i] = n = a.pop('parsed_mapping', 0) or next(global_free_masked if a.get('masked') else free) e = a.pop('element') - if it := a.pop('isotope'): - if isinstance(e, int): - e = QueryElement.from_atomic_number(e)(it) - else: - e = QueryElement.from_symbol(e)(it) - g.add_atom(e, n, **a) - - for n, m, b in data['bonds']: + if isinstance(e, int): + e = QueryElement.from_atomic_number(e) + elif isinstance(e, str): + e = QueryElement.from_symbol(e) + else: + e = partial(ListElement, e) + g.add_atom(e(**a), n) + + for n, m, b in parsed['bonds']: + if n in stereo_bonds and m in stereo_bonds: + if m not in stereo_bonds[n]: # only simple cis-trans supported, not cumulenes + _, s1 = stereo_bonds[n].popitem() + _, s2 = stereo_bonds[m].popitem() + if isinstance(b, int): + b = QueryBond(b, stereo=s1 == s2) + else: + b.stereo = s1 == s2 g.add_bond(mapping[n], mapping[m], b) return g diff --git a/chython/files/daylight/smiles.py b/chython/files/daylight/smiles.py index 2271a052..293597ac 100644 --- a/chython/files/daylight/smiles.py +++ b/chython/files/daylight/smiles.py @@ -78,7 +78,7 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: contract = None if '>' in smi: - record = {'reactants': [], 'reagents': [], 'products': [], 'log': log, 'meta': None, 'title': None} + record = {'reactants': [], 'reagents': [], 'products': [], 'log': log} try: reactants, reagents, products = smi.split('>') except ValueError as e: @@ -143,11 +143,12 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: atom_map[x]['is_radical'] = True postprocess_parsed_reaction(record, remap=remap, ignore=ignore) - rxn = create_reaction(record, ignore_bad_isotopes=ignore_bad_isotopes, _r_cls=_r_cls, _m_cls=_m_cls) + rxn = create_reaction(record, ignore_bad_isotopes=ignore_bad_isotopes, keep_radicals=False, + ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + _r_cls=_r_cls, _m_cls=_m_cls) for mol, tmp in zip(rxn.molecules(), chain(record['reactants'], record['reagents'], record['products'])): - postprocess_molecule(mol, tmp, ignore=ignore, ignore_stereo=ignore_stereo, - ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, - ignore_aromatic_radicals=ignore_aromatic_radicals) + postprocess_molecule(mol, tmp, ignore_stereo=ignore_stereo) return rxn else: record = parser(smiles_tokenize(smi), not ignore) @@ -156,122 +157,35 @@ def smiles(data, /, *, ignore: bool = True, remap: bool = False, ignore_stereo: record['log'].extend(log) postprocess_parsed_molecule(record, remap=remap, ignore=ignore) - mol = create_molecule(record, ignore_bad_isotopes=ignore_bad_isotopes, _cls=_m_cls) - postprocess_molecule(mol, record, ignore=ignore, ignore_stereo=ignore_stereo, - ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, - ignore_aromatic_radicals=ignore_aromatic_radicals) + mol = create_molecule(record, ignore_bad_isotopes=ignore_bad_isotopes, keep_radicals=False, + ignore_carbon_radicals=ignore_carbon_radicals, keep_implicit=keep_implicit, + ignore_aromatic_radicals=ignore_aromatic_radicals, ignore=ignore, + _cls=_m_cls) + postprocess_molecule(mol, record, ignore_stereo=ignore_stereo) return mol -def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, ignore_carbon_radicals=False, - keep_implicit=False, ignore_aromatic_radicals=True): +def postprocess_molecule(molecule, data, *, ignore_stereo=False): mapping = data['mapping'] - atoms = molecule._atoms - bonds = molecule._bonds - charges = molecule._charges - hydrogens = molecule._hydrogens - radicals = molecule._radicals - hyb = molecule.hybridization - radicalized = [] - - implicit_mismatch = {} - if 'chython_parsing_log' in molecule.meta: - log = molecule.meta['chython_parsing_log'] - else: - log = [] - - for n, a in enumerate(data['atoms']): - h = a['hydrogen'] - if h is None: # simple atom token - continue - # bracket token should always contain implicit hydrogens count. - n = mapping[n] - if keep_implicit: # override any calculated hydrogens count. - hydrogens[n] = h - elif (hc := hydrogens[n]) is None: # atom has invalid valence or aromatic ring. - if hyb(n) == 4: # this is aromatic rings. just store given H count. - hydrogens[n] = h - # rare H0 case - if (not ignore_aromatic_radicals and not h and not charges[n] and not radicals[n] and - atoms[n].atomic_number in (5, 6, 7, 15) and sum(b.order != 8 for b in bonds[n].values()) == 2): - # c[c]c - aromatic B,C,N,P radical - radicals[n] = True - radicalized.append(n) - elif not radicals[n]: # CXSMILES radical not set. - # SMILES doesn't code radicals. so, let's try to guess. - radicals[n] = True - if molecule._check_implicit(n, h): # radical form is valid - radicalized.append(n) - hydrogens[n] = h - elif ignore: # radical state also has errors. - radicals[n] = False # reset radical state - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif hc != h: # H count mismatch. - if hyb(n) == 4: - if not h and not charges[n] and not radicals[n] and atoms[n].atomic_number in (5, 6, 7, 15) and \ - sum(b.order != 8 for b in bonds[n].values()) == 2: - # c[c]c - aromatic B,C,N,P radical - hydrogens[n] = 0 - radicals[n] = True - radicalized.append(n) - elif ignore: - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif molecule._check_implicit(n, h): # set another possible implicit state. probably Al, P - hydrogens[n] = h - elif not radicals[n]: # CXSMILES radical is not set. try radical form - radicals[n] = True - if molecule._check_implicit(n, h): - hydrogens[n] = h - radicalized.append(n) - # radical state also has errors. - elif ignore: - radicals[n] = False # reset radical state - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - elif ignore: # just ignore it - implicit_mismatch[n] = h - log.append(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - else: - raise ValueError(f'implicit hydrogen count ({h}) mismatch with calculated on atom {n}') - - if ignore_carbon_radicals: - for n in radicalized: - if atoms[n].atomic_number == 6: - radicals[n] = False - hydrogens[n] += 1 - log.append(f'carbon radical {n} replaced with implicit hydrogen') - - if implicit_mismatch: - molecule.meta['chython_implicit_mismatch'] = implicit_mismatch - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log if ignore_stereo: return - - stereo_atoms = [(n, s) for n, a in enumerate(data['atoms']) if (s := a['stereo']) is not None] - if not stereo_atoms and not data['stereo_bonds']: + elif not data['stereo_atoms'] and not data['stereo_bonds']: return - st = molecule._stereo_tetrahedrons - sa = molecule._stereo_allenes + atoms = molecule._atoms + st = molecule.stereogenic_tetrahedrons + sa = molecule.stereogenic_allenes sat = molecule._stereo_allenes_terminals - ctt = molecule._stereo_cis_trans_terminals + ctc = molecule._stereo_cis_trans_counterpart order = {mapping[n]: [mapping[m] for m in ms] for n, ms in data['order'].items()} + log = [] stereo = [] - for i, s in stereo_atoms: + for i, s in data['stereo_atoms'].items(): n = mapping[i] - if not i and hydrogens[n]: # first atom in smiles has reversed chiral mark + if not i and atoms[n].implicit_hydrogens: # first atom in smiles has reversed chiral mark s = not s if n in st: @@ -282,6 +196,8 @@ def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, ig n1 = next(x for x in order[t1] if x in env) n2 = next(x for x in order[t2] if x in env) stereo.append((molecule.add_atom_stereo, n, (n1, n2), s)) + else: + log.append(f'non chiral atom {n} has stereo label in smiles') stereo_bonds = {mapping[n]: {mapping[m]: s for m, s in ms.items()} for n, ms in data['stereo_bonds'].items()} @@ -289,9 +205,8 @@ def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, ig for n, ns in stereo_bonds.items(): if n in seen: continue - if n in ctt: - nm = ctt[n] - m = nm[1] if nm[0] == n else nm[0] + if n in ctc: + m = ctc[n] if m in stereo_bonds: seen.add(m) n2, s2 = stereo_bonds[m].popitem() @@ -320,8 +235,11 @@ def postprocess_molecule(molecule, data, *, ignore=True, ignore_stereo=False, ig continue break - if log and 'chython_parsing_log' not in molecule.meta: - molecule.meta['chython_parsing_log'] = log + if log: + if 'chython_parsing_log' not in molecule.meta: + molecule.meta['chython_parsing_log'] = log + else: + molecule.meta['chython_parsing_log'].extend(log) __all__ = ['smiles'] diff --git a/chython/files/daylight/test/__init__.py b/chython/files/daylight/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/files/daylight/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/tests/files/daylight/test_daylight_smarts.py b/chython/files/daylight/test/test_daylight_smarts.py similarity index 100% rename from tests/files/daylight/test_daylight_smarts.py rename to chython/files/daylight/test/test_daylight_smarts.py diff --git a/tests/files/daylight/test_daylight_smiles.py b/chython/files/daylight/test/test_daylight_smiles.py similarity index 100% rename from tests/files/daylight/test_daylight_smiles.py rename to chython/files/daylight/test/test_daylight_smiles.py diff --git a/tests/files/daylight/test_parser.py b/chython/files/daylight/test/test_parser.py similarity index 100% rename from tests/files/daylight/test_parser.py rename to chython/files/daylight/test/test_parser.py diff --git a/chython/files/daylight/test/test_tokenize.py b/chython/files/daylight/test/test_tokenize.py new file mode 100644 index 00000000..3532813a --- /dev/null +++ b/chython/files/daylight/test/test_tokenize.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# Copyright 2025 Tagir Akhmetshin +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython.files.daylight.tokenize import smiles_tokenize, smarts_tokenize +from chython.containers import QueryBond +from chython.exceptions import IncorrectSmiles +from pytest import raises + + +def test_smiles_tokenize(): + assert smiles_tokenize('C') == [(0, {'element': 'C'})] + assert smiles_tokenize('CC') == [(0, {'element': 'C'}), (0, {'element': 'C'})] + assert smiles_tokenize('C=O') == [(0, {'element': 'C'}), (1, 2), (0, {'element': 'O'})] + assert smiles_tokenize('C(O)N') == [(0, {'element': 'C'}), (2, None), (0, {'element': 'O'}), + (3, None), (0, {'element': 'N'})] + assert smiles_tokenize('C2CC2') == [(0, {'element': 'C'}), (6, 2), (0, {'element': 'C'}), + (0, {'element': 'C'}), (6, 2)] + + +def test_smiles_tokenize_atom(): + assert smiles_tokenize('[NH4+]') == [(0, {'element': 'N', 'isotope': None, 'parsed_mapping': None, 'charge': 1, + 'implicit_hydrogens': 4, 'stereo': None})] + assert smiles_tokenize('[14N]') == [(0, {'element': 'N', 'isotope': 14, 'parsed_mapping': None, 'charge': 0, + 'implicit_hydrogens': 0, 'stereo': None})] + assert smiles_tokenize('[N@H]') == [(0, {'element': 'N', 'isotope': None, 'parsed_mapping': None, 'charge': 0, + 'implicit_hydrogens': 1, 'stereo': True})] + assert smiles_tokenize('[N@@H--]') == [(0, {'element': 'N', 'isotope': None, 'parsed_mapping': None, 'charge': -2, + 'implicit_hydrogens': 1, 'stereo': False})] + assert smiles_tokenize('[N@+3]') == [(0, {'element': 'N', 'isotope': None, 'parsed_mapping': None, 'charge': 3, + 'implicit_hydrogens': 0, 'stereo': True})] + assert smiles_tokenize('[CH2:2]') == [(0, {'element': 'C', 'isotope': None, 'parsed_mapping': 2, 'charge': 0, + 'implicit_hydrogens': 2, 'stereo': None})] + with raises(IncorrectSmiles): + smiles_tokenize('[@N]') + + +def test_smarts_tokenize_atom(): + # Test basic SMARTS tokenization + assert smarts_tokenize('[C]') == [(0, {'element': 'C'})] + assert smarts_tokenize('[C,N]') == [(0, {'element': ['C', 'N']})] + assert smarts_tokenize('[C+]') == [(0, {'charge': 1, 'element': 'C'})] + assert smarts_tokenize('[#1]') == [(0, {'element': 1})] + assert smarts_tokenize('[C;h1;@]') == [(0, {'element': 'C', 'implicit_hydrogens': [1], 'stereo': True})] + assert smarts_tokenize('[O;z1,z2;x1]') == [(0, {'element': 'O', 'heteroatoms': [1], 'hybridization': [1, 2]})] + assert smarts_tokenize('[Se;a;D1,D2;r4,r7:3]') == [(0, {'parsed_mapping': 3, 'element': 'Se', 'hybridization': 4, 'neighbors': [1, 2], 'ring_sizes': [4, 7]})] + assert smarts_tokenize('[Cl;M]') == [(0, {'element': 'Cl', 'masked': True})] + assert smarts_tokenize('[A:1]') == [(0, {'parsed_mapping': 1, 'element': 'A'})] + assert smarts_tokenize('[M]') == [(0, {'element': 'M'})] + + +def test_smarts_tokenize_bonds(): + assert smarts_tokenize('[C][C]') == [(0, {'element': 'C'}), (0, {'element': 'C'})] + assert smarts_tokenize('[C]-[C]') == [(0, {'element': 'C'}), (1, 1), (0, {'element': 'C'})] + assert smarts_tokenize('[C]~[C]') == [(0, {'element': 'C'}), (1, 8), (0, {'element': 'C'})] + assert smarts_tokenize('[C]!:[C]') == [(0, {'element': 'C'}), (10, [1, 2, 3]), (0, {'element': 'C'})] + assert smarts_tokenize('[C]-,=[C]') == [(0, {'element': 'C'}), (10, [1, 2]), (0, {'element': 'C'})] + assert smarts_tokenize('[C]-;@[C]') == [(0, {'element': 'C'}), (12, QueryBond(1, True)), (0, {'element': 'C'})] + assert smarts_tokenize('[C]!-;!@[C]') == [(0, {'element': 'C'}), (12, QueryBond((2, 3, 4), False)), + (0, {'element': 'C'})] + assert smarts_tokenize('[C]-,=;!@[C]') == [(0, {'element': 'C'}), (12, QueryBond((1, 2), False)), + (0, {'element': 'C'})] diff --git a/chython/files/daylight/tokenize.py b/chython/files/daylight/tokenize.py index 645d87e9..fe626b9f 100644 --- a/chython/files/daylight/tokenize.py +++ b/chython/files/daylight/tokenize.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022, 2023 Ramil Nugmanov +# Copyright 2022-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,11 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from re import compile, fullmatch, match, search -from .._mdl import common_isotopes +from re import compile, match, search from ...containers.bonds import QueryBond from ...exceptions import IncorrectSmiles, IncorrectSmarts -from ...periodictable.element import ListElement # -,= OR bonds supported @@ -49,7 +47,6 @@ # 12: in ring bond -atomic_numbers = dict(enumerate(common_isotopes, 1)) iso_re = compile(r'^[0-9]+') chg_re = compile(r'[+-][1-4+-]?') mpp_re = compile(r':[1-9][0-9]*$') @@ -243,7 +240,7 @@ def _tokenize(smiles): def _atom_parse(token): # [isotope]Element[element][@[@]][H[n]][+-charge][:mapping] - _match = fullmatch(atom_re, token) + _match = atom_re.fullmatch(token) if _match is None: raise IncorrectSmiles(f'atom token invalid {token}') isotope, element, stereo, hydrogen, charge, mapping = _match.groups() @@ -275,34 +272,32 @@ def _atom_parse(token): mapping = int(mapping[1:]) except ValueError: raise IncorrectSmiles('invalid mapping token') - else: - mapping = 0 if element in ('c', 'n', 'o', 'p', 's', 'as', 'se', 'b', 'te'): _type = 8 element = element.capitalize() else: _type = 0 - return _type, {'element': element, 'isotope': isotope, 'mapping': mapping, 'charge': charge, 'is_radical': False, - 'x': 0., 'y': 0., 'z': 0., 'hydrogen': hydrogen, 'stereo': stereo} + return _type, {'element': element, 'isotope': isotope, 'parsed_mapping': mapping, 'charge': charge, + 'implicit_hydrogens': hydrogen, 'stereo': stereo} def _query_parse(token): + out = {} if isotope := match(iso_re, token): token = token[isotope.end():] # remove isotope substring - isotope = int(isotope.group()) + out['isotope'] = int(isotope.group()) if charge := search(chg_re, token): token = token[:charge.start()] + token[charge.end():] # remove charge substring - charge = charge_dict[charge.group()] - else: - charge = 0 + out['charge'] = charge_dict[charge.group()] + if mapping := search(mpp_re, token): token = token[:mapping.start()] - mapping = int(mapping.group()[1:]) - else: - mapping = 0 + out['parsed_mapping'] = int(mapping.group()[1:]) + if stereo := search(str_re, token): # drop stereo mark. unsupported token = token[:stereo.start()] + token[stereo.end():] + out['stereo'] = stereo.group() == '@' # supported only <;> and <,> logic. <&> and silent <&> not supported! primitives = token.split(';') @@ -310,35 +305,21 @@ def _query_parse(token): element = [int(x[1:]) if x.startswith('#') else x for x in element.split(',')] if len(element) == 1: element = element[0] - else: # only atoms supported - tmp = [] - for x in element: - if isinstance(x, int): - try: - tmp.append(atomic_numbers[x]) - except KeyError as e: - raise IncorrectSmiles('Invalid atomic number') from e - elif x in common_isotopes: - tmp.append(x) - else: - raise IncorrectSmarts('Invalid element symbol') - element = ListElement(tmp) else: raise IncorrectSmarts('Empty element') + out['element'] = element - hybridization = rings_sizes = neighbors = hydrogens = heteroatoms = None - masked = False for p in primitives[1:]: # parse hydrogens (h), neighbors (D), rings_sizes (r or !R), hybridization == 4 (a) if not p: continue elif p == 'a': # aromatic atom - hybridization = 4 + out['hybridization'] = 4 elif p == 'A': # ignore aliphatic mark. Ad-Hoc for Marwin. continue elif p == '!R': - rings_sizes = 0 + out['ring_sizes'] = 0 elif p == 'M': - masked = True + out['masked'] = True else: p = p.split(',') if len(p) != 1 and len({x[0] for x in p}) > 1: @@ -352,19 +333,16 @@ def _query_parse(token): raise IncorrectSmarts('Unsupported SMARTS primitive') if t == 'D': - neighbors = p + out['neighbors'] = p elif t == 'h': - hydrogens = p + out['implicit_hydrogens'] = p elif t == 'r': # r - rings_sizes = p + out['ring_sizes'] = p elif t == 'x': - heteroatoms = p + out['heteroatoms'] = p else: # z - hybridization = p - - return 0, {'element': element, 'isotope': isotope, 'mapping': mapping, 'charge': charge, 'is_radical': False, - 'heteroatoms': heteroatoms, 'hydrogens': hydrogens, 'neighbors': neighbors, - 'rings_sizes': rings_sizes, 'hybridization': hybridization, 'masked': masked} + out['hybridization'] = p + return 0, out def smiles_tokenize(smi): @@ -372,8 +350,7 @@ def smiles_tokenize(smi): out = [] for token_type, token in tokens: if token_type in (0, 8): # simple atom - out.append((token_type, {'element': token, 'isotope': None, 'mapping': 0, 'charge': 0, 'is_radical': False, - 'x': 0., 'y': 0., 'z': 0., 'hydrogen': None, 'stereo': None})) + out.append((token_type, {'element': token})) elif token_type == 5: out.append(_atom_parse(token)) elif token_type == 10: @@ -388,9 +365,7 @@ def smarts_tokenize(smi): out = [] for token_type, token in tokens: if token_type in (0, 8): # simple atom - out.append((0, {'element': token, 'isotope': None, 'mapping': 0, 'charge': 0, 'is_radical': False, - 'heteroatoms': None, 'hydrogens': None, 'neighbors': None, - 'rings_sizes': None, 'hybridization': None, 'masked': False})) + out.append((0, {'element': token})) elif token_type == 5: out.append(_query_parse(token)) else: diff --git a/chython/files/libinchi/wrapper.py b/chython/files/libinchi/wrapper.py index 0fb7daf3..8d583fb6 100644 --- a/chython/files/libinchi/wrapper.py +++ b/chython/files/libinchi/wrapper.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2023 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -21,11 +21,10 @@ from sysconfig import get_platform from warnings import warn from .._convert import create_molecule -from .._mdl import common_isotopes from ...containers import MoleculeContainer from ...containers.bonds import Bond from ...exceptions import ValenceError, IsChiral, NotChiral -from ...periodictable import H +from ...periodictable import H as _H try: @@ -34,6 +33,9 @@ from importlib_resources import files, as_file +H = 1 + + def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> MoleculeContainer: """ INCHI string parser @@ -47,15 +49,23 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo raise ValueError('invalid INCHI') atoms, bonds = [], [] + protium = {} + deuterium = {} + tritium = {} seen = set() for n in range(structure.num_atoms): seen.add(n) atom = structure.atom[n] - atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'mapping': 0, 'x': atom.x, 'y': atom.y, + atoms.append({'element': atom.atomic_symbol, 'charge': atom.charge, 'x': atom.x, 'y': atom.y, 'z': atom.z, 'isotope': atom.isotope, 'is_radical': atom.is_radical, - 'hydrogens': atom.implicit_hydrogens, 'p': atom.implicit_protium, 'd': atom.implicit_deuterium, - 't': atom.implicit_tritium}) + 'implicit_hydrogens': atom.implicit_hydrogens, 'delta_isotope': atom.delta_isotope}) + if atom.implicit_protium: + protium[n] = atom.implicit_protium + if atom.implicit_deuterium: + deuterium[n] = atom.implicit_deuterium + if atom.implicit_tritium: + tritium[n] = atom.implicit_tritium for k in range(atom.num_bonds): m = atom.neighbor[k] @@ -83,8 +93,9 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo lib.FreeStructFromINCHI(byref(structure)) - tmp = {'atoms': atoms, 'bonds': bonds, 'stereo_atoms': stereo_atoms, 'stereo_allenes': stereo_allenes, 'log': [], - 'stereo_cumulenes': stereo_cumulenes, 'mapping': list(range(1, len(atoms) + 1)), 'title': None, 'meta': None} + tmp = {'atoms': atoms, 'bonds': bonds, 'stereo_atoms': stereo_atoms, 'stereo_allenes': stereo_allenes, + 'stereo_cumulenes': stereo_cumulenes, 'mapping': list(range(1, len(atoms) + 1)), + 'protium': protium, 'deuterium': deuterium, 'tritium': tritium} mol = create_molecule(tmp, skip_calc_implicit=True, _cls=_cls) postprocess_molecule(mol, tmp, ignore_stereo=ignore_stereo) return mol @@ -93,52 +104,37 @@ def inchi(data, /, *, ignore_stereo: bool = False, _cls=MoleculeContainer) -> Mo def postprocess_molecule(molecule, data, *, ignore_stereo=False): atoms = molecule._atoms bonds = molecule._bonds - charges = molecule._charges - radicals = molecule._radicals - hydrogens = molecule._hydrogens - plane = molecule._plane # set hydrogen atoms. INCHI designed for hydrogens handling. hope correctly. free = count(len(atoms) + 1) - for n, atom in enumerate(data['atoms'], 1): - if atom['element'] != 'H': - hydrogens[n] = atom['hydrogens'] - # in chython hydrogens never have implicit H. - elif atom['hydrogens']: # >[xH]-H case - m = next(free) - charges[m] = 0 - radicals[m] = False - plane[m] = (0., 0.) - hydrogens[n] = 0 - hydrogens[m] = 0 - atoms[m] = a = H() - a._attach_graph(molecule, m) + to_add = [] + for n, atom in atoms.items(): + # in chython hydrogens never have implicit H. convert to explicit + if atom == H and atom.implicit_hydrogens: + for _ in range(atom.implicit_hydrogens): + to_add.append((n, next(free), _H(implicit_hydrogens=0))) + atom._implicit_hydrogens = 0 + + for n, p in data['protium'].items(): + to_add.append((n + 1, next(free), _H(isotope=1, implicit_hydrogens=0))) + for n, p in data['deuterium'].items(): + to_add.append((n + 1, next(free), _H(isotope=2, implicit_hydrogens=0))) + for n, p in data['tritium'].items(): + to_add.append((n + 1, next(free), _H(isotope=3, implicit_hydrogens=0))) + + if to_add: + for n, m, a in to_add: + atoms[m] = a bonds[n][m] = b = Bond(1) bonds[m] = {n: b} - b._attach_graph(molecule, n, m) - else: # H+, H* or >H-[xH] cases - hydrogens[n] = 0 - # convert isotopic implicit hydrogens to explicit - for i, k in enumerate(('p', 'd', 't'), 1): - if atom[k]: - for _ in range(atom[k]): - m = next(free) - charges[m] = 0 - radicals[m] = False - plane[m] = (0., 0.) - hydrogens[m] = 0 - atoms[m] = a = H(i) - a._attach_graph(molecule, m) - bonds[n][m] = b = Bond(1) - bonds[m] = {n: b} - b._attach_graph(molecule, n, m) + molecule.calc_labels() # reset labels if ignore_stereo or not data['stereo_atoms'] and not data['stereo_cumulenes'] and not data['stereo_allenes']: return - st = molecule._stereo_tetrahedrons - sa = molecule._stereo_allenes - ctt = molecule._stereo_cis_trans_terminals + st = molecule.stereogenic_tetrahedrons + sa = molecule.stereogenic_allenes + ctc = molecule._stereo_cis_trans_counterpart stereo = [] for n, ngb, s in data['stereo_atoms']: @@ -151,7 +147,7 @@ def postprocess_molecule(molecule, data, *, ignore_stereo=False): stereo.append((molecule.add_atom_stereo, n, nn + 1, mn + 1, s)) for n, m, nn, nm, s in data['stereo_cumulenes']: n += 1 - if n in ctt: + if n in ctc: stereo.append((molecule.add_cis_trans_stereo, n, m + 1, nn + 1, nm + 1, s)) while stereo: @@ -200,12 +196,13 @@ def atomic_symbol(self): @property def isotope(self): - isotope = self.isotopic_mass - if not isotope: - isotope = None - elif isotope > 9000: # OVER NINE THOUSANDS! - isotope += common_isotopes[self.atomic_symbol] - 10000 - return isotope + if 0 < self.isotopic_mass < 9000: # OVER NINE THOUSANDS! + return self.isotopic_mass + + @property + def delta_isotope(self): + if self.isotopic_mass > 9000: + return self.isotopic_mass - 10_000 @property def is_radical(self): diff --git a/chython/files/_mdl/__init__.py b/chython/files/mdl/__init__.py similarity index 89% rename from chython/files/_mdl/__init__.py rename to chython/files/mdl/__init__.py index d941f381..2310481a 100644 --- a/chython/files/_mdl/__init__.py +++ b/chython/files/mdl/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2017-2023 Ramil Nugmanov +# Copyright 2017-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .mol import parse_mol_v2000, common_isotopes +from .mol import parse_mol_v2000 from .emol import parse_mol_v3000 from .rxn import parse_rxn_v2000 from .erxn import parse_rxn_v3000 diff --git a/chython/files/_mdl/emol.py b/chython/files/mdl/emol.py similarity index 93% rename from chython/files/_mdl/emol.py rename to chython/files/mdl/emol.py index 9e6b4437..e8390a9c 100644 --- a/chython/files/_mdl/emol.py +++ b/chython/files/mdl/emol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -21,7 +21,7 @@ def parse_mol_v3000(data, *, _header=True): if _header: - title = data[1].strip() or None + title = data[0].strip() or None data = data[4:] else: title = None @@ -36,7 +36,6 @@ def parse_mol_v3000(data, *, _header=True): atoms = [] bonds = [] stereo = [] - hydrogens = {} meta = {} atom_map = {} star_points = [] @@ -95,7 +94,7 @@ def parse_mol_v3000(data, *, _header=True): atom_map[n] = len(atoms) atoms.append({'element': a, 'isotope': i, 'charge': c, 'is_radical': r, - 'x': float(x), 'y': float(y), 'z': float(z), 'mapping': int(m)}) + 'x': float(x), 'y': float(y), 'z': float(z), 'parsed_mapping': int(m)}) for line in data[2 + atom_count: 2 + atom_count + bonds_count]: _, t, a1, a2, *kvs = split(line) @@ -150,13 +149,13 @@ def parse_mol_v3000(data, *, _header=True): drop = True for line in data[3 + atom_count + bonds_count:]: - if line.startswith('M V30 END CTAB'): + if line.startswith('END CTAB'): break elif drop: - if line.startswith('M V30 BEGIN SGROUP'): + if line.startswith('BEGIN SGROUP'): drop = False continue - elif line.startswith('M V30 END SGROUP'): + elif line.startswith('END SGROUP'): break _, _type, i, *kvs = split(line) @@ -172,14 +171,13 @@ def parse_mol_v3000(data, *, _header=True): d = v.strip('"') if a and f and d: if f == 'MRV_IMPLICIT_H': - hydrogens[a[0]] = int(d[6:]) + atoms[a[0]]['implicit_hydrogens'] = int(d[6:]) else: log.append(f'ignored SGROUP DAT {i}: {a}\t{f}\t{d}') elif _type.startswith('SRU'): raise ValueError('Polymers not supported') - return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': meta or None, 'log': log} + return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'meta': meta, 'log': log} def split(line): # todo optimize diff --git a/chython/files/_mdl/erxn.py b/chython/files/mdl/erxn.py similarity index 94% rename from chython/files/_mdl/erxn.py rename to chython/files/mdl/erxn.py index 25354f9b..d088cabe 100644 --- a/chython/files/_mdl/erxn.py +++ b/chython/files/mdl/erxn.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -29,7 +29,7 @@ def parse_rxn_v3000(data, *, ignore=True): if not reagents_count: raise EmptyReaction - title = data[2].strip() or None + title = data[1].strip() or None log = [] molecules = [] @@ -61,7 +61,7 @@ def parse_rxn_v3000(data, *, ignore=True): reagents_count -= 1 return {'reactants': molecules[:reactants_count], 'products': molecules[reactants_count:products_count], - 'reagents': molecules[products_count:], 'title': title, 'meta': None, 'log': log} + 'reagents': molecules[products_count:], 'title': title, 'log': log} __all__ = ['parse_rxn_v3000'] diff --git a/chython/files/_mdl/mol.py b/chython/files/mdl/mol.py similarity index 68% rename from chython/files/_mdl/mol.py rename to chython/files/mdl/mol.py index 3879b7ea..88b21373 100644 --- a/chython/files/_mdl/mol.py +++ b/chython/files/mdl/mol.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,19 +19,6 @@ from ...exceptions import EmptyMolecule, InvalidCharge, InvalidV2000 -common_isotopes = {'H': 1, 'He': 4, 'Li': 7, 'Be': 9, 'B': 11, 'C': 12, 'N': 14, 'O': 16, 'F': 19, 'Ne': 20, 'Na': 23, - 'Mg': 24, 'Al': 27, 'Si': 28, 'P': 31, 'S': 32, 'Cl': 35, 'Ar': 40, 'K': 39, 'Ca': 40, 'Sc': 45, - 'Ti': 48, 'V': 51, 'Cr': 52, 'Mn': 55, 'Fe': 56, 'Co': 59, 'Ni': 59, 'Cu': 64, 'Zn': 65, 'Ga': 70, - 'Ge': 73, 'As': 75, 'Se': 79, 'Br': 80, 'Kr': 84, 'Rb': 85, 'Sr': 88, 'Y': 89, 'Zr': 91, 'Nb': 93, - 'Mo': 96, 'Tc': 98, 'Ru': 101, 'Rh': 103, 'Pd': 106, 'Ag': 108, 'Cd': 112, 'In': 115, 'Sn': 119, - 'Sb': 122, 'Te': 128, 'I': 127, 'Xe': 131, 'Cs': 133, 'Ba': 137, 'La': 139, 'Ce': 140, 'Pr': 141, - 'Nd': 144, 'Pm': 145, 'Sm': 150, 'Eu': 152, 'Gd': 157, 'Tb': 159, 'Dy': 163, 'Ho': 165, 'Er': 167, - 'Tm': 169, 'Yb': 173, 'Lu': 175, 'Hf': 178, 'Ta': 181, 'W': 184, 'Re': 186, 'Os': 190, 'Ir': 192, - 'Pt': 195, 'Au': 197, 'Hg': 201, 'Tl': 204, 'Pb': 207, 'Bi': 209, 'Po': 209, 'At': 210, 'Rn': 222, - 'Fr': 223, 'Ra': 226, 'Ac': 227, 'Th': 232, 'Pa': 231, 'U': 238, 'Np': 237, 'Pu': 244, 'Am': 243, - 'Cm': 247, 'Bk': 247, 'Cf': 251, 'Es': 252, 'Fm': 257, 'Md': 258, 'No': 259, 'Lr': 260, 'Rf': 261, - 'Db': 270, 'Sg': 269, 'Bh': 270, 'Hs': 270, 'Mt': 278, 'Ds': 281, 'Rg': 281, 'Cn': 285, 'Nh': 278, - 'Fl': 289, 'Mc': 289, 'Lv': 293, 'Ts': 297, 'Og': 294} _ctf_data = {'R': 'is_radical', 'C': 'charge', 'I': 'isotope'} _charge_map = {' 0': 0, ' 1': 3, ' 2': 2, ' 3': 1, ' 4': 0, ' 5': -1, ' 6': -2, ' 7': -3} @@ -45,11 +32,10 @@ def parse_mol_v2000(data): raise EmptyMolecule log = [] - title = data[1].strip() or None + title = data[0].strip() or None atoms = [] bonds = [] stereo = [] - hydrogens = {} dat = {} for line in data[4: 4 + atoms_count]: @@ -59,6 +45,7 @@ def parse_mol_v2000(data): raise InvalidCharge element = line[31:34].strip() isotope = line[34:36] + delta_isotope = None if element in 'AL': raise ValueError('queries not supported') @@ -68,17 +55,15 @@ def parse_mol_v2000(data): raise ValueError('isotope on deuterium atom') isotope = 2 elif isotope != ' 0': - try: - isotope = common_isotopes[element] + int(isotope) - except KeyError: - raise ValueError('invalid element symbol') + delta_isotope = int(isotope) + isotope = None else: isotope = None mapping = line[60:63] - atoms.append({'element': element, 'charge': charge, 'isotope': isotope, 'is_radical': False, - 'mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), - 'z': float(line[20:30])}) + atoms.append({'element': element, 'charge': charge, 'isotope': isotope, + 'parsed_mapping': int(mapping) if mapping else 0, 'x': float(line[0:10]), 'y': float(line[10:20]), + 'z': float(line[20:30]), 'delta_isotope': delta_isotope}) for line in data[4 + atoms_count: 4 + atoms_count + bonds_count]: a1, a2 = int(line[0:3]) - 1, int(line[3:6]) - 1 @@ -137,7 +122,7 @@ def parse_mol_v2000(data): log.append(f'ignored line: {line}') for a in atoms: - if a['is_radical']: # int to bool + if 'is_radical' in a: # int to bool a['is_radical'] = True for x in dat.values(): try: @@ -147,14 +132,13 @@ def parse_mol_v2000(data): value = x['value'] if len(_atoms) != 1 or _atoms[0] == -1 or not value: raise InvalidV2000(f'MRV_IMPLICIT_H spec invalid {x}') - hydrogens[_atoms[0]] = int(value[6:]) + atoms[_atoms[0]]['implicit_hydrogens'] = int(value[6:]) else: log.append(f'ignored data: {x}') except KeyError: raise InvalidV2000(f'Invalid SGROUP {x}') - return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'hydrogens': hydrogens, - 'meta': None, 'log': log} + return {'title': title, 'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'log': log} -__all__ = ['parse_mol_v2000', 'common_isotopes'] +__all__ = ['parse_mol_v2000'] diff --git a/chython/files/_mdl/read.py b/chython/files/mdl/read.py similarity index 100% rename from chython/files/_mdl/read.py rename to chython/files/mdl/read.py diff --git a/chython/files/_mdl/rxn.py b/chython/files/mdl/rxn.py similarity index 94% rename from chython/files/_mdl/rxn.py rename to chython/files/mdl/rxn.py index d81ee459..56977fe1 100644 --- a/chython/files/_mdl/rxn.py +++ b/chython/files/mdl/rxn.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -29,7 +29,7 @@ def parse_rxn_v2000(data, *, ignore=True): if not reagents_count: raise EmptyReaction - title = data[2].strip() or None + title = data[1].strip() or None log = [] molecules = [] @@ -61,7 +61,7 @@ def parse_rxn_v2000(data, *, ignore=True): reagents_count -= 1 return {'reactants': molecules[:reactants_count], 'products': molecules[reactants_count:products_count], - 'reagents': molecules[products_count:], 'title': title, 'meta': None, 'log': log} + 'reagents': molecules[products_count:], 'title': title, 'log': log} __all__ = ['parse_rxn_v2000'] diff --git a/chython/files/mdl/stereo.py b/chython/files/mdl/stereo.py new file mode 100644 index 00000000..212cb77d --- /dev/null +++ b/chython/files/mdl/stereo.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2024 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from ...exceptions import NotChiral, IsChiral, ValenceError + + +def postprocess_molecule(molecule, data, *, ignore_stereo=False, calc_cis_trans=False): + if ignore_stereo: + return + mapping = data['mapping'] + log = [] + + if calc_cis_trans: + molecule.calculate_cis_trans_from_2d(clean_cache=False) + + stereo = [(mapping[n], mapping[m], s) for n, m, s in data['stereo']] + while stereo: + fail_stereo = [] + old_stereo = len(stereo) + for n, m, s in stereo: + try: + molecule.add_wedge(n, m, s, clean_cache=False) + except NotChiral: + fail_stereo.append((n, m, s)) + except IsChiral: + pass + except ValenceError: + log.append('structure has errors, stereo data skipped') + molecule.flush_cache() + break + else: + stereo = fail_stereo + if len(stereo) == old_stereo: + break + molecule.flush_stereo_cache() + if calc_cis_trans: + molecule.calculate_cis_trans_from_2d(clean_cache=False) + continue + break + + if log: + if 'chython_parsing_log' not in molecule.meta: + molecule.meta['chython_parsing_log'] = log + else: + molecule.meta['chython_parsing_log'].extend(log) + + +__all__ = ['postprocess_molecule'] diff --git a/chython/files/_mdl/write.py b/chython/files/mdl/write.py similarity index 82% rename from chython/files/_mdl/write.py rename to chython/files/mdl/write.py index c6bfc1bd..a998251e 100644 --- a/chython/files/_mdl/write.py +++ b/chython/files/mdl/write.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -77,24 +77,20 @@ def _write_molecule(self, g, write3d=None): else: z = 0 - gc = g._charges - gr = g._radicals - gp = g._plane - gb = g._bonds + bonds = g._bonds file = self._file file.write(f'M V30 BEGIN CTAB\nM V30 COUNTS {g.atoms_count} {g.bonds_count} 0 0 0\nM V30 BEGIN ATOM\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, (m, a) in enumerate(g.atoms(), start=1): if write3d is not None: x, y, z = xyz[m] z = f'{z:.4f}' else: - x, y = gp[m] + x, y = a.x, a.y - c = gc[m] - c = f' CHG={c}' if c else '' - r = ' RAD=2' if gr[m] else '' + c = f' CHG={a.charge}' if a.charge else '' + r = ' RAD=2' if a.is_radical else '' i = f' MASS={a.isotope}' if a.isotope else '' if not self._mapping: @@ -107,7 +103,7 @@ def _write_molecule(self, g, write3d=None): wedge = defaultdict(set) i = 0 # trick for empty wedge_map for i, (n, m, s) in enumerate(g._wedge_map, start=1): - file.write(f'M V30 {i} {gb[n][m].order} {mapping[n]} {mapping[m]} CFG={s == 1 and "1" or "3"}\n') + file.write(f'M V30 {i} {bonds[n][m].order} {mapping[n]} {mapping[m]} CFG={s == 1 and "1" or "3"}\n') wedge[n].add(m) wedge[m].add(n) @@ -130,43 +126,39 @@ def _write_molecule(self, g, write3d=None): else: z = 0. - gc = g._charges - gr = g._radicals - gp = g._plane - gb = g._bonds + bonds = g._bonds file = self._file file.write(f'{g.name}\n\n\n{g.atoms_count:3d}{g.bonds_count:3d} 0 0 0 0 999 V2000\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, (m, a) in enumerate(g.atoms(), start=1): if write3d is not None: x, y, z = xyz[m] else: - x, y = gp[m] + x, y = a.x, a.y - c = charge_map[gc[m]] + c = charge_map[a.charge] if not self._mapping: m = 0 file.write(f'{x:10.4f}{y:10.4f}{z:10.4f} {a.atomic_symbol:3s} 0{c} 0 0 0 0 0 0 0{m:3d} 0 0\n') - atoms = {m: n for n, m in enumerate(g._atoms, start=1)} + atoms = {m: n for n, m in enumerate(g, start=1)} wedge = defaultdict(set) for n, m, s in g._wedge_map: - file.write(f'{atoms[n]:3d}{atoms[m]:3d} {gb[n][m].order} {s == 1 and "1" or "6"} 0 0 0\n') + file.write(f'{atoms[n]:3d}{atoms[m]:3d} {bonds[n][m].order} {s == 1 and "1" or "6"} 0 0 0\n') wedge[n].add(m) wedge[m].add(n) for n, m, b in g.bonds(): if m not in wedge[n]: file.write(f'{atoms[n]:3d}{atoms[m]:3d} {b.order} 0 0 0 0\n') - for n, (m, a) in enumerate(g._atoms.items(), start=1): + for n, (_, a) in enumerate(g.atoms(), start=1): if a.isotope: file.write(f'M ISO 1 {n:3d} {a.isotope:3d}\n') - if gr[m]: + if a.is_radical: file.write(f'M RAD 1 {n:3d} 2\n') # invalid for carbenes - c = gc[m] - if c in (-4, 4): - file.write(f'M CHG 1 {n:3d} {c:3d}\n') + if a.charge in (-4, 4): + file.write(f'M CHG 1 {n:3d} {a.charge:3d}\n') file.write('M END\n') diff --git a/chython/files/xyz.py b/chython/files/xyz.py index 42ec82e7..a77a8489 100644 --- a/chython/files/xyz.py +++ b/chython/files/xyz.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -31,28 +31,21 @@ def xyz(matrix: Sequence[Tuple[str, float, float, float]], charge=0, radical=0, mol = _cls() conformer = {} - mol._conformers.append(conformer) + mol._conformers = [conformer] atoms = mol._atoms bonds = mol._bonds - plane = mol._plane - hydrogens = mol._hydrogens - radicals = mol._radicals for n, (a, x, y, z) in enumerate(matrix, 1): - atoms[n] = atom = Element.from_symbol(a)() - atom._attach_graph(mol, n) + atoms[n] = Element.from_symbol(a)(x=x, y=y, implicit_hydrogens=0) bonds[n] = {} - plane[n] = (x, y) conformer[n] = (x, y, z) - hydrogens[n] = 0 # implicit hydrogens not supported. - radicals[n] = False # set default value - if atom_charge is None or None in atom_charge: - mol._charges = {n: 0 for n in atoms} # reset charges - else: - mol._charges = dict(enumerate(atom_charge, 1)) + if atom_charge is not None and None not in atom_charge: + for n, c in enumerate(atom_charge, 1): + atoms[n]._charge = c charge = sum(atom_charge) + mol.calc_labels() pb = possible_bonds(array(list(conformer.values())), array([a.atomic_radius for a in atoms.values()]), radius_multiplier) diff --git a/chython/periodictable/__init__.py b/chython/periodictable/__init__.py index 304f6e44..d494564e 100644 --- a/chython/periodictable/__init__.py +++ b/chython/periodictable/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2018-2021 Ramil Nugmanov +# Copyright 2018-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -17,9 +17,9 @@ # along with this program; if not, see . # from abc import ABCMeta -from .element import * -from .groups import * -from .periods import * +from .base import * +from .base.groups import * +from .base.periods import * from .groupI import * from .groupII import * from .groupIII import * @@ -39,6 +39,7 @@ from .groupXVII import * from .groupXVIII import * + modules = {v.__name__: v for k, v in globals().items() if k.startswith('group') and k != 'groups'} elements = {k: v for k, v in globals().items() if isinstance(v, ABCMeta) and k != 'Element' and issubclass(v, Element)} @@ -48,12 +49,21 @@ __all__.extend(elements) -for _class in (DynamicElement, QueryElement): - for k, v in elements.items(): - name = f'{_class.__name__[:-7]}{k}' - globals()[name] = cls = type(name, (_class, *v.__mro__[-3:-1]), - {'__module__': v.__module__, '__slots__': (), 'atomic_number': v.atomic_number, - 'atomic_radius': v.atomic_radius}) - setattr(modules[v.__module__], name, cls) - modules[v.__module__].__all__.append(name) - __all__.append(name) +for k, v in elements.items(): + name = f'Dynamic{k}' + globals()[name] = cls = type(name, (DynamicElement,), + {'__module__': v.__module__, '__slots__': (), + 'atomic_number': v.atomic_number}) + setattr(modules[v.__module__], name, cls) + modules[v.__module__].__all__.append(name) + __all__.append(name) + +for k, v in elements.items(): + name = f'Query{k}' + globals()[name] = cls = type(name, (QueryElement,), + {'__module__': v.__module__, '__slots__': (), + 'atomic_number': v.atomic_number, + 'mdl_isotope': v.mdl_isotope}) + setattr(modules[v.__module__], name, cls) + modules[v.__module__].__all__.append(name) + __all__.append(name) diff --git a/chython/periodictable/element/__init__.py b/chython/periodictable/base/__init__.py similarity index 71% rename from chython/periodictable/element/__init__.py rename to chython/periodictable/base/__init__.py index 1fecc8f4..75806828 100644 --- a/chython/periodictable/element/__init__.py +++ b/chython/periodictable/base/__init__.py @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov -# Copyright 2019 Tagir Akhmetshin -# Copyright 2019 Dayana Bashirova +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,10 +16,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .core import * +from .dynamic import * from .element import * from .query import * -from .dynamic import * -__all__ = ['Core', 'Element', 'DynamicElement', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] +__all__ = ['Element', 'DynamicElement', 'Query', 'ExtendedQuery', + 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/element/dynamic.py b/chython/periodictable/base/dynamic.py similarity index 51% rename from chython/periodictable/element/dynamic.py rename to chython/periodictable/base/dynamic.py index 70aaaabd..c7af1a7a 100644 --- a/chython/periodictable/element/dynamic.py +++ b/chython/periodictable/base/dynamic.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2022 Ramil Nugmanov +# Copyright 2020-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,20 +16,34 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from abc import ABC -from typing import Type, Union -from .core import Core +from abc import ABC, abstractmethod +from typing import Type, Optional from .element import Element -from ...exceptions import IsNotConnectedAtom -class DynamicElement(Core, ABC): - __slots__ = ('__p_charge', '__p_is_radical') +class DynamicElement(ABC): + __slots__ = ('_charge', '_is_radical', '_p_charge', '_p_is_radical', '_isotope') + + def __init__(self, isotope: Optional[int]): + self._isotope = isotope + self._charge = self._p_charge = 0 + self._is_radical = self._p_is_radical = False + + @property + def isotope(self): + return self._isotope @property def atomic_symbol(self) -> str: return self.__class__.__name__[7:] + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + @classmethod def from_symbol(cls, symbol: str) -> Type['DynamicElement']: """ @@ -53,29 +67,52 @@ def from_atomic_number(cls, number: int) -> Type['DynamicElement']: return element @classmethod - def from_atom(cls, atom: Union['Element', 'DynamicElement']) -> 'DynamicElement': + def from_atom(cls, atom: 'Element') -> 'DynamicElement': """ - get DynamicElement object from Element object or copy of DynamicElement object + get DynamicElement object from Element object """ - if isinstance(atom, Element): - return cls.from_atomic_number(atom.atomic_number)(atom.isotope) - elif not isinstance(atom, DynamicElement): - raise TypeError('Element or DynamicElement expected') - return atom.copy() + if not isinstance(atom, Element): + raise TypeError('Element expected') + dynamic = object.__new__(cls.from_atomic_number(atom.atomic_number)) + dynamic._isotope = atom.isotope + dynamic._charge = dynamic._p_charge = atom.charge + dynamic._is_radical = dynamic._p_is_radical = atom.is_radical + return dynamic + + @classmethod + def from_atoms(cls, atom1: 'Element', atom2: 'Element') -> 'DynamicElement': + """ + get DynamicElement object from pair of Element objects + """ + if not isinstance(atom1, Element) or not isinstance(atom2, Element): + raise TypeError('Element expected') + if atom1.atomic_number != atom2.atomic_number: + raise ValueError('elements should be of the same type') + if atom1.isotope != atom2.isotope: + raise ValueError('elements should be of the same isotope') + dynamic = object.__new__(cls.from_atomic_number(atom1.atomic_number)) + dynamic._isotope = atom1.isotope + dynamic._charge = atom1.charge + dynamic._p_charge = atom2.charge + dynamic._is_radical = atom1.is_radical + dynamic._p_is_radical = atom2.is_radical + return dynamic + + @property + def charge(self) -> int: + return self._charge + + @property + def is_radical(self) -> bool: + return self._is_radical @property def p_charge(self) -> int: - try: - return self._graph()._p_charges[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._p_charge @property def p_is_radical(self) -> bool: - try: - return self._graph()._p_radicals[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._p_is_radical def __eq__(self, other): """ @@ -96,5 +133,17 @@ def is_dynamic(self) -> bool: """ return self.charge != self.p_charge or self.is_radical != self.p_is_radical + def copy(self): + copy = object.__new__(self.__class__) + copy._isotope = self.isotope + copy._charge = self.charge + copy._is_radical = self.is_radical + copy._p_is_radical = self.p_is_radical + copy._p_charge = self.p_charge + return copy + + def __copy__(self): + return self.copy() + __all__ = ['DynamicElement'] diff --git a/chython/periodictable/element/element.py b/chython/periodictable/base/element.py similarity index 62% rename from chython/periodictable/element/element.py rename to chython/periodictable/base/element.py index 22a28386..8fdf99c8 100644 --- a/chython/periodictable/element/element.py +++ b/chython/periodictable/base/element.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020-2023 Ramil Nugmanov +# Copyright 2020-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -20,31 +20,70 @@ from CachedMethods import class_cached_property from collections import defaultdict from typing import Dict, List, Optional, Set, Tuple, Type -from .core import Core -from ...exceptions import IsNotConnectedAtom, ValenceError +from .vector import Vector +from ...exceptions import ValenceError -class Element(Core, ABC): - __slots__ = () +class Element(ABC): + __slots__ = ('_isotope', '_charge', '_is_radical', '_xy', '_implicit_hydrogens', + '_explicit_hydrogens', '_stereo', '_parsed_mapping', + '_neighbors', '_heteroatoms', '_hybridization', '_ring_sizes', '_in_ring') __class_cache__ = {} - def __init__(self, isotope: Optional[int] = None): + def __init__(self, isotope: Optional[int] = None, *, + charge: int = 0, is_radical: bool = False, x: float = 0., y: float = 0., + implicit_hydrogens: Optional[int] = None, stereo: Optional[bool] = None, + parsed_mapping: Optional[int] = None, delta_isotope: Optional[int] = None): """ Element object with specified isotope :param isotope: Isotope number of element """ - if isinstance(isotope, int): - if isotope not in self.isotopes_distribution: - raise ValueError(f'isotope number {isotope} impossible or not stable for {self.atomic_symbol}') - elif isotope is not None: - raise TypeError('integer isotope number required') - super().__init__(isotope) + if delta_isotope is not None: + assert isotope is None, 'isotope absolute value and delta value provided' + isotope = self.mdl_isotope + delta_isotope + + self.isotope = isotope + self.charge = charge + self.is_radical = is_radical + self._xy = Vector(x, y) + + self._implicit_hydrogens = implicit_hydrogens + self._stereo = stereo + self._parsed_mapping = parsed_mapping + + def __repr__(self): + if self.isotope: + return f'{self.__class__.__name__}({self.isotope})' + return f'{self.__class__.__name__}()' @property def atomic_symbol(self) -> str: return self.__class__.__name__ + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + + @property + def isotope(self) -> Optional[int]: + """ + Isotope number + """ + return self._isotope + + @isotope.setter + def isotope(self, value: Optional[int]): + if isinstance(value, int): + if value not in self.isotopes_distribution: + raise ValueError(f'isotope number {value} impossible or not stable for {self.atomic_symbol}') + elif value is not None: + raise TypeError('integer isotope number required') + self._isotope = value + @property def atomic_mass(self) -> float: mass = self.isotopes_masses @@ -73,103 +112,133 @@ def atomic_radius(self) -> float: Valence radius of atom """ - @Core.charge.setter - def charge(self, charge: int): - if not isinstance(charge, int): + @property + @abstractmethod + def mdl_isotope(self) -> int: + """ + MDL MOL common isotope + """ + + @property + def is_forming_single_bonds(self) -> bool: + """ + Atom can form stable covalent single bonds in molecules + """ + return False + + @property + def is_forming_double_bonds(self) -> bool: + """ + Atom can form stable covalent double bonds in molecules + """ + return False + + @property + def charge(self) -> int: + """ + Charge of atom + """ + return self._charge + + @charge.setter + def charge(self, value: int): + """ + Update charge of atom. Make sure to flush cache and recalculate hydrogens count and stereo. + Or use context manager on molecule: + + with mol: + mol.atom(1).charge = 1 + """ + if not isinstance(value, int): raise TypeError('formal charge should be int in range [-4, 4]') - elif charge > 4 or charge < -4: + elif value > 4 or value < -4: raise ValueError('formal charge should be in range [-4, 4]') - try: - g = self._graph() - g._charges[self._n] = charge - except AttributeError: - raise IsNotConnectedAtom - else: - g._calc_implicit(self._n) - g.flush_cache() - g.fix_stereo() + self._charge = value - @Core.is_radical.setter - def is_radical(self, is_radical: bool): - if not isinstance(is_radical, bool): + @property + def is_radical(self) -> bool: + """ + Radical state of atoms + """ + return self._is_radical + + @is_radical.setter + def is_radical(self, value: bool): + """ + Update radical state of atom. Make sure to flush cache and recalculate hydrogens count and stereo. + Or use context manager on molecule: + + with mol: + mol.atom(1).is_radical = True + """ + if not isinstance(value, bool): raise TypeError('bool expected') - try: - g = self._graph() - g._radicals[self._n] = is_radical - except AttributeError: - raise IsNotConnectedAtom - else: - g._calc_implicit(self._n) - g.flush_cache() - g.fix_stereo() + self._is_radical = value @property def x(self) -> float: """ X coordinate of atom on 2D plane """ - try: - return self._graph()._plane[self._n][0] - except AttributeError: - raise IsNotConnectedAtom + return self._xy.x + + @x.setter + def x(self, value: float): + self._xy.x = value @property def y(self) -> float: """ Y coordinate of atom on 2D plane """ - try: - return self._graph()._plane[self._n][1] - except AttributeError: - raise IsNotConnectedAtom + return self._xy.y + + @y.setter + def y(self, value: float): + self._xy.y = value @property - def xy(self) -> Tuple[float, float]: + def xy(self) -> Vector: """ (X, Y) coordinates of atom on 2D plane """ - try: - return self._graph()._plane[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._xy + + @xy.setter + def xy(self, value: Tuple[float, float]): + self._xy = Vector(*value) @property def implicit_hydrogens(self) -> Optional[int]: - try: - return self._graph()._hydrogens[self._n] - except AttributeError: - raise IsNotConnectedAtom + return self._implicit_hydrogens @property def explicit_hydrogens(self) -> int: - try: - return self._graph().explicit_hydrogens(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._explicit_hydrogens @property def total_hydrogens(self) -> int: - try: - return self._graph().total_hydrogens(self._n) - except AttributeError: - raise IsNotConnectedAtom + if self.implicit_hydrogens is None: + raise ValenceError + return self.implicit_hydrogens + self.explicit_hydrogens + + @property + def stereo(self) -> Optional[bool]: + """ + Tetrahedron or allene stereo label + """ + return self._stereo @property def heteroatoms(self) -> int: - try: - return self._graph().heteroatoms(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._heteroatoms @property def neighbors(self) -> int: """ Neighbors count of atom """ - try: - return self._graph().neighbors(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._neighbors @property def hybridization(self): @@ -178,32 +247,53 @@ def hybridization(self): of single bonded, 3 - if has one triple bonded and any amount of double and single bonded neighbors or two double bonded and any amount of single bonded neighbors, 4 - if atom in aromatic ring. """ - try: - return self._graph().hybridization(self._n) - except AttributeError: - raise IsNotConnectedAtom + return self._hybridization @property - def ring_sizes(self) -> Tuple[int, ...]: + def ring_sizes(self) -> Set[int]: """ Atom rings sizes. """ - try: - return self._graph().atoms_rings_sizes[self._n] - except AttributeError: - raise IsNotConnectedAtom - except KeyError: - return () + return self._ring_sizes @property def in_ring(self) -> bool: """ Atom in any ring. """ - try: - return self._n in self._graph().ring_atoms - except AttributeError: - raise IsNotConnectedAtom + return self._in_ring + + def copy(self, full=False, hydrogens=False, stereo=False) -> 'Element': + """ + Get a copy of the Element object with attribute copy control. + """ + copy = object.__new__(self.__class__) + copy._isotope = self.isotope + copy._charge = self.charge + copy._is_radical = self.is_radical + copy._xy = self.xy + if full: + copy._implicit_hydrogens = self.implicit_hydrogens + copy._stereo = self.stereo + copy._explicit_hydrogens = self.explicit_hydrogens + copy._neighbors = self.neighbors + copy._heteroatoms = self.heteroatoms + copy._hybridization = self.hybridization + copy._ring_sizes = self.ring_sizes.copy() + copy._in_ring = self.in_ring + else: + if hydrogens: + copy._implicit_hydrogens = self.implicit_hydrogens + else: + copy._implicit_hydrogens = None + if stereo: + copy._stereo = self.stereo + else: + copy._stereo = None + return copy + + def __copy__(self): + return self.copy() @classmethod def from_symbol(cls, symbol: str) -> Type['Element']: @@ -231,32 +321,28 @@ def from_atomic_number(cls, number: int) -> Type['Element']: except KeyError: raise ValueError(f'Element with number "{number}" not found') - @classmethod - def from_atom(cls, atom: 'Element') -> 'Element': - """ - get Element copy - """ - if not isinstance(atom, Element): - raise TypeError('Element expected') - return atom.copy() - def __eq__(self, other): """ compare attached to molecules elements """ + if isinstance(other, int): + return self.atomic_number == other + elif isinstance(other, str): + return self.atomic_symbol == other return isinstance(other, Element) and self.atomic_number == other.atomic_number and \ self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical def __hash__(self): - return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.implicit_hydrogens or 0)) + return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, + self.implicit_hydrogens or 0, self.in_ring)) - def valence_rules(self, charge: int, is_radical: bool, valence: int) -> \ + def valence_rules(self, valence: int) -> \ List[Tuple[Set[Tuple[int, 'Element']], Dict[Tuple[int, 'Element'], int], int]]: """ valence rules for element with specific charge/radical state """ try: - return self._compiled_valence_rules[(charge, is_radical, valence)] + return self._compiled_valence_rules[(self.charge, self.is_radical, valence)] except KeyError: raise ValenceError diff --git a/chython/periodictable/groups.py b/chython/periodictable/base/groups.py similarity index 95% rename from chython/periodictable/groups.py rename to chython/periodictable/base/groups.py index 912c9ae3..75809c61 100644 --- a/chython/periodictable/groups.py +++ b/chython/periodictable/base/groups.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/periods.py b/chython/periodictable/base/periods.py similarity index 93% rename from chython/periodictable/periods.py rename to chython/periodictable/base/periods.py index 2f3e6cba..f05e6d08 100644 --- a/chython/periodictable/periods.py +++ b/chython/periodictable/base/periods.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify diff --git a/chython/periodictable/base/query.py b/chython/periodictable/base/query.py new file mode 100644 index 00000000..def97785 --- /dev/null +++ b/chython/periodictable/base/query.py @@ -0,0 +1,473 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020-2025 Ramil Nugmanov +# Copyright 2021 Dmitrij Zanadvornykh +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from abc import ABC, abstractmethod +from functools import cached_property +from typing import Tuple, Type, List, Union, Optional +from .element import Element +from .groups import GroupXVIII + + +def _validate(value, prop): + if value is None: + return () + elif isinstance(value, int): + if value < 0 or value > 14: + raise ValueError(f'{prop} should be in range [0, 14]') + return (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(x, int) for x in value): + raise TypeError(f'{prop} should be list or tuple of ints') + if any(x < 0 or x > 14 for x in value): + raise ValueError(f'{prop} should be in range [0, 14]') + if len(set(value)) != len(value): + raise ValueError(f'{prop} should be unique') + return tuple(sorted(value)) + else: + raise TypeError(f'{prop} should be int or list or tuple of ints') + + +class Query(ABC): + __slots__ = ('_neighbors', '_hybridization', '_masked') + + def __init__(self, neighbors: Union[int, Tuple[int, ...], None] = None, + hybridization: Union[int, Tuple[int, ...], None] = None, masked: bool = False): + self.neighbors = neighbors + self.hybridization = hybridization + self.masked = masked + + @property + @abstractmethod + def atomic_symbol(self) -> str: + ... + + @property + def neighbors(self) -> Tuple[int, ...]: + return self._neighbors + + @neighbors.setter + def neighbors(self, value): + self._neighbors = _validate(value, 'neighbors') + + @property + def hybridization(self) -> Tuple[int, ...]: + return self._hybridization + + @hybridization.setter + def hybridization(self, value): + if value is None: + self._hybridization = () + elif isinstance(value, int): + if value < 1 or value > 4: + raise ValueError('hybridization should be in range [1, 4]') + self._hybridization = (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(h, int) for h in value): + raise TypeError('hybridizations should be list or tuple of ints') + if any(h < 1 or h > 4 for h in value): + raise ValueError('hybridizations should be in range [1, 4]') + if len(set(value)) != len(value): + raise ValueError('hybridizations should be unique') + self._hybridization = tuple(sorted(value)) + else: + raise TypeError('hybridization should be int or list or tuple of ints') + + @property + def masked(self): + return self._masked + + @masked.setter + def masked(self, value): + if not isinstance(value, bool): + raise TypeError('masked should be bool') + self._masked = value + + def copy(self, full=False): + copy = object.__new__(self.__class__) + copy._neighbors = self.neighbors + copy._hybridization = self.hybridization + + copy._masked = self.masked if full else False + return copy + + def __copy__(self): + return self.copy() + + def __repr__(self): + return f'{self.__class__.__name__}()' + + +class ExtendedQuery(Query, ABC): + __slots__ = ('_charge', '_is_radical', '_heteroatoms', '_ring_sizes', '_implicit_hydrogens', '_stereo') + + def __init__(self, charge: int = 0, is_radical: bool = False, heteroatoms: Union[int, Tuple[int, ...], None] = None, + ring_sizes: Union[int, Tuple[int, ...], None] = None, + implicit_hydrogens: Union[int, Tuple[int, ...], None] = None, stereo: Optional[bool] = None, **kwargs): + super().__init__(**kwargs) + self.charge = charge + self.is_radical = is_radical + self.heteroatoms = heteroatoms + self.ring_sizes = ring_sizes + self.implicit_hydrogens = implicit_hydrogens + self.stereo = stereo + + @property + def charge(self) -> int: + """ + Charge of atom + """ + return self._charge + + @charge.setter + def charge(self, value: int): + if not isinstance(value, int): + raise TypeError('formal charge should be int in range [-4, 4]') + elif value > 4 or value < -4: + raise ValueError('formal charge should be in range [-4, 4]') + self._charge = value + + @property + def is_radical(self) -> bool: + """ + Radical state of atoms + """ + return self._is_radical + + @is_radical.setter + def is_radical(self, value: bool): + if not isinstance(value, bool): + raise TypeError('bool expected') + self._is_radical = value + + @property + def heteroatoms(self) -> Tuple[int, ...]: + return self._heteroatoms + + @heteroatoms.setter + def heteroatoms(self, value): + self._heteroatoms = _validate(value, 'heteroatoms') + + @property + def implicit_hydrogens(self) -> Tuple[int, ...]: + return self._implicit_hydrogens + + @implicit_hydrogens.setter + def implicit_hydrogens(self, value): + self._implicit_hydrogens = _validate(value, 'implicit hydrogens') + + @property + def ring_sizes(self) -> Tuple[int, ...]: + """ + Atom rings sizes. + """ + return self._ring_sizes + + @ring_sizes.setter + def ring_sizes(self, value): + if value is None: + self._ring_sizes = () + elif isinstance(value, int): + if value < 3 and value != 0: + raise ValueError('rings should be greater or equal 3. ring equal to zero is no ring atom mark') + self._ring_sizes = (value,) + elif isinstance(value, (tuple, list)): + if not all(isinstance(x, int) for x in value): + raise TypeError('rings should be list or tuple of ints') + if any(x < 3 for x in value): + raise ValueError('rings should be greater or equal 3') + if len(set(value)) != len(value): + raise ValueError('rings should be unique') + self._ring_sizes = tuple(sorted(value)) + else: + raise TypeError('rings should be int or list or tuple of ints') + + @property + def stereo(self): + return self._stereo + + @stereo.setter + def stereo(self, value: Optional[bool]): + if value is not None and not isinstance(value, bool): + raise TypeError('stereo should be bool') + self._stereo = value + + def copy(self, full=False): + copy = super().copy(full=full) + copy._charge = self.charge + copy._is_radical = self.is_radical + copy._heteroatoms = self.heteroatoms + copy._implicit_hydrogens = self.implicit_hydrogens + copy._ring_sizes = self.ring_sizes + + copy._stereo = self.stereo if full else None + return copy + + +class AnyMetal(Query): + """ + Charge and radical ignored any metal. Rings, hydrogens and heteroatoms count also ignored. + + Class designed for d-elements matching in standardization. + """ + __slots__ = () + + @property + def atomic_symbol(self) -> str: + return 'M' + + def __eq__(self, other): + if not isinstance(other, Element): + return False + if other.is_forming_single_bonds or isinstance(other, GroupXVIII): + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + return True + + +class AnyElement(ExtendedQuery): + __slots__ = () + + @property + def atomic_symbol(self) -> str: + return 'A' + + def __eq__(self, other): + """ + Compare attached to molecules elements and query elements + """ + if not isinstance(other, Element): + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + + +class ListElement(ExtendedQuery): + __slots__ = ('_elements', '__dict__') + + def __init__(self, elements: List[str], **kwargs): + """ + Elements list + """ + if not isinstance(elements, (list, tuple)) or not elements: + raise ValueError('invalid elements list') + tmp = [] + for x in elements: + if isinstance(x, int): + tmp.append(Element.from_atomic_number(x).__name__) + elif isinstance(x, str): + tmp.append(Element.from_symbol(x).__name__) + else: + raise ValueError(f'invalid element: {x}') + super().__init__(**kwargs) + self._elements = tuple(tmp) + + @property + def atomic_symbol(self) -> str: + return ','.join(self._elements) + + @cached_property + def atomic_numbers(self): + return tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() if x.__name__ in self._elements) + + def copy(self, full=False): + copy = super().copy(full=full) + copy._elements = self._elements + return copy + + def __eq__(self, other): + """ + Compare attached to molecules elements and query elements + """ + if not isinstance(other, Element): + return False + if other.atomic_number not in self.atomic_numbers: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + + def __repr__(self): + return f'{self.__class__.__name__}([{self.atomic_symbol}])' + + +class QueryElement(ExtendedQuery, ABC): + __slots__ = ('_isotope',) + + def __init__(self, isotope: Optional[int] = None, **kwargs): + super().__init__(**kwargs) + self.isotope = isotope + + def __repr__(self): + if self.isotope: + return f'{self.__class__.__name__}({self.isotope})' + return f'{self.__class__.__name__}()' + + @property + def atomic_symbol(self) -> str: + return self.__class__.__name__[5:] + + @property + @abstractmethod + def atomic_number(self) -> int: + """ + Element number + """ + + @property + def isotope(self): + return self._isotope + + @isotope.setter + def isotope(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise TypeError('isotope must be an int') + self._isotope = value + + @property + @abstractmethod + def mdl_isotope(self) -> int: + ... + + @classmethod + def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: + """ + get Element class by its symbol + """ + if symbol == 'A': + return AnyElement + elif symbol == 'M': + return AnyMetal + try: + element = next(x for x in QueryElement.__subclasses__() if x.__name__ == f'Query{symbol}') + except StopIteration: + raise ValueError(f'QueryElement with symbol "{symbol}" not found') + return element + + @classmethod + def from_atomic_number(cls, number: int) -> Type['QueryElement']: + """ + get Element class by its number + """ + try: + element = next(x for x in QueryElement.__subclasses__() if x.atomic_number.fget(None) == number) + except StopIteration: + raise ValueError(f'QueryElement with number "{number}" not found') + return element + + @classmethod + def from_atom(cls, atom: 'Element', neighbors=False, hybridization=False, heteroatoms=False, + hydrogens=False, ring_sizes=False, stereo=False) -> 'QueryElement': + """ + get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement + """ + if not isinstance(atom, Element): + raise TypeError('Element or Query expected') + + # transfer true atomic props + query = cls.from_atomic_number(atom.atomic_number)(atom.isotope) + query._charge = atom.charge + query._is_radical = atom.is_radical + + if neighbors: + query._neighbors = (atom.neighbors,) + if hybridization: + query._hybridization = (atom.hybridization,) + if heteroatoms: + query._heteroatoms = (atom.heteroatoms,) + if ring_sizes: + query._ring_sizes = atom.ring_sizes + if hydrogens and atom.implicit_hydrogens is not None: + query._implicit_hydrogens = (atom.implicit_hydrogens,) + if stereo: + query._stereo = atom.stereo + return query + + def copy(self, full=False): + copy = super().copy(full=full) + copy._isotope = self.isotope + return copy + + def __eq__(self, other): + """ + compare attached to molecules elements and query elements + """ + if not isinstance(other, Element): + return False + if self.atomic_number != other.atomic_number: + return False + if self.charge != other.charge: + return False + if self.is_radical != other.is_radical: + return False + if self.isotope and self.isotope != other.isotope: + return False + if self.neighbors and other.neighbors not in self.neighbors: + return False + if self.hybridization and other.hybridization not in self.hybridization: + return False + if self.ring_sizes: + if self.ring_sizes[0]: + if other.ring_sizes.isdisjoint(self.ring_sizes): + return False + elif other.ring_sizes: # not in ring expected + return False + if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: + return False + if self.heteroatoms and other.heteroatoms not in self.heteroatoms: + return False + return True + + +__all__ = ['Query', 'ExtendedQuery', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/base/vector.py b/chython/periodictable/base/vector.py new file mode 100644 index 00000000..c23d2773 --- /dev/null +++ b/chython/periodictable/base/vector.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2024 Denis Lipatov +# Copyright 2024 Vyacheslav Grigorev +# Copyright 2024 Timur Gimadiev +# Copyright 2024, 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from math import cos, sin, hypot, atan2 + + +class Vector: + __slots__ = ('x', 'y') + + def __init__(self, x: float = 0., y: float = 0.): + self.x = x + self.y = y + + def __repr__(self): + return f'Vector({self.x}, {self.y})' + + def __neg__(self): + """ + A class method that inverts the current coordinates of objects of the class + """ + return Vector(-self.x, -self.y) + + def __sub__(self, vector: 'Vector'): + """ + A method for the operation of subtraction between vectors + """ + return Vector(self.x - vector.x, self.y - vector.y) + + def __add__(self, vector: 'Vector'): + """ + A method for the operation of addition between vectors + """ + return Vector(self.x + vector.x, self.y + vector.y) + + def __truediv__(self, scalar: float): + """ + A class method that divides the coordinates of the vector by a given scalar + """ + return Vector(self.x / scalar, self.y / scalar) + + def __mul__(self, scalar: float): + """ + Multiplies the coordinates of the current vector by an arbitrary real number + """ + return Vector(self.x * scalar, self.y * scalar) + + def __float__(self): + """ + Calculates the length of the current vector + + Returns float + """ + return hypot(self.x, self.y) + + def __iter__(self): + yield self.x + yield self.y + + def __len__(self): + return 2 + + def __matmul__(self, vector: 'Vector'): + return self.x * vector.y - self.y * vector.x + + def __or__(self, vector: 'Vector'): + """ + Calculate distance between two vectors + """ + return hypot(vector.x - self.x, vector.y - self.y) + + def rotate(self, angle: float, vector: 'Vector' = None): + """ + A method that rotates the vector by the angle in radians + """ + c = cos(angle) + s = sin(angle) + if vector is None: + return Vector(self.x * c - self.y * s, self.x * s + self.y * c) + xy = self - vector + return vector + Vector(xy.x * c - xy.y * s, xy.x * s + xy.y * c) + + def normalise(self): + """ + Normalization of coordinates (dividing them by the length of the vector itself) + """ + if ln := float(self): + return self / ln + return self + + def angle(self, vector: 'Vector' = None) -> float: + """ + A method calculates the angle of inclination of the current vector + or the vector between given vector and the current vector. + """ + if vector is None: + return atan2(self.y, self.x) + else: + return atan2(vector.y - self.y, vector.x - self.x) + + +__all__ = ['Vector'] diff --git a/chython/periodictable/element/core.py b/chython/periodictable/element/core.py deleted file mode 100644 index f5ab05ca..00000000 --- a/chython/periodictable/element/core.py +++ /dev/null @@ -1,118 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020-2022 Ramil Nugmanov -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from abc import ABC, abstractmethod -from typing import Optional, TypeVar -from weakref import ref -from ...exceptions import IsConnectedAtom, IsNotConnectedAtom - - -T = TypeVar('T') - - -class Core(ABC): - __slots__ = ('__isotope', '_graph', '_n') - - def __init__(self, isotope: Optional[int] = None): - self.__isotope = isotope - - def __repr__(self): - if self.__isotope: - return f'{self.__class__.__name__}({self.__isotope})' - return f'{self.__class__.__name__}()' - - def __getstate__(self): - return {'isotope': self.__isotope} - - def __setstate__(self, state): - self.__isotope = state['isotope'] - - @abstractmethod - def __hash__(self): - """ - Atom hash used in Morgan atom numbering algorithm. - """ - - @property - @abstractmethod - def atomic_symbol(self) -> str: - """ - Element symbol - """ - - @property - @abstractmethod - def atomic_number(self) -> int: - """ - Element number - """ - - @property - def isotope(self) -> Optional[int]: - """ - Isotope number - """ - return self.__isotope - - @property - def charge(self) -> int: - """ - Charge of atom - """ - try: - return self._graph()._charges[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def is_radical(self) -> bool: - """ - Radical state of atoms - """ - try: - return self._graph()._radicals[self._n] - except AttributeError: - raise IsNotConnectedAtom - - def copy(self: T) -> T: - """ - Detached from graph copy of element - """ - copy = object.__new__(self.__class__) - copy._Core__isotope = self.__isotope - return copy - - def _attach_graph(self, graph, n): - try: - self._graph - except AttributeError: - self._graph = ref(graph) - self._n = n - else: - raise IsConnectedAtom - - def _change_map(self, n): - try: - self._graph - except AttributeError: - raise IsNotConnectedAtom - else: - self._n = n - - -__all__ = ['Core'] diff --git a/chython/periodictable/element/query.py b/chython/periodictable/element/query.py deleted file mode 100644 index 94b9edca..00000000 --- a/chython/periodictable/element/query.py +++ /dev/null @@ -1,318 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020-2024 Ramil Nugmanov -# Copyright 2021 Dmitrij Zanadvornykh -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from abc import ABC -from typing import Tuple, Type, List, Union -from .core import Core -from .element import Element -from ...exceptions import IsNotConnectedAtom - - -_inorganic = {'He', 'Ne', 'Ar', 'Kr', 'Xe', 'F', 'Cl', 'Br', 'I', 'B', 'C', 'N', 'O', - 'H', 'Si', 'P', 'S', 'Se', 'Ge', 'As', 'Sb', 'Te', 'At'} - - -class Query(Core, ABC): - __slots__ = () - - @property - def neighbors(self) -> Tuple[int, ...]: - try: - return self._graph()._neighbors[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def hybridization(self): - try: - return self._graph()._hybridizations[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def heteroatoms(self) -> Tuple[int, ...]: - try: - return self._graph()._heteroatoms[self._n] - except AttributeError: - raise IsNotConnectedAtom - - @property - def ring_sizes(self) -> Tuple[int, ...]: - """ - Atom rings sizes. - """ - try: - return self._graph()._rings_sizes[self._n] - except AttributeError: - raise IsNotConnectedAtom - except KeyError: - return () - - @property - def implicit_hydrogens(self) -> Tuple[int, ...]: - try: - return self._graph()._hydrogens[self._n] - except AttributeError: - raise IsNotConnectedAtom - - -class QueryElement(Query, ABC): - __slots__ = () - - @property - def atomic_symbol(self) -> str: - return self.__class__.__name__[5:] - - @classmethod - def from_symbol(cls, symbol: str) -> Type[Union['QueryElement', 'AnyElement', 'AnyMetal']]: - """ - get Element class by its symbol - """ - if symbol == 'A': - return AnyElement - elif symbol == 'M': - return AnyMetal - try: - element = next(x for x in QueryElement.__subclasses__() if x.__name__ == f'Query{symbol}') - except StopIteration: - raise ValueError(f'QueryElement with symbol "{symbol}" not found') - return element - - @classmethod - def from_atomic_number(cls, number: int) -> Type['QueryElement']: - """ - get Element class by its number - """ - try: - element = next(x for x in QueryElement.__subclasses__() if x.atomic_number.fget(None) == number) - except StopIteration: - raise ValueError(f'QueryElement with number "{number}" not found') - return element - - @classmethod - def from_atom(cls, atom: Union['Element', 'Query']) -> 'Query': - """ - get QueryElement or AnyElement object from Element object or copy of QueryElement or AnyElement - """ - if isinstance(atom, Element): - return cls.from_atomic_number(atom.atomic_number)(atom.isotope) - elif not isinstance(atom, Query): - raise TypeError('Element or Query expected') - return atom.copy() - - def __eq__(self, other): - """ - compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if self.atomic_number == other.atomic_number and self.charge == other.charge and \ - self.is_radical == other.is_radical: - if self.isotope and self.isotope != other.isotope: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, QueryElement) and self.atomic_number == other.atomic_number and \ - self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - # equal query element has equal query marks - return True - return False - - def __hash__(self): - return hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical, self.neighbors, - self.hybridization, self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - - -class AnyElement(Query): - __slots__ = () - - def __init__(self, *args, **kwargs): - super().__init__() - - @property - def atomic_symbol(self) -> str: - return 'A' - - @property - def atomic_number(self) -> int: - return 0 - - def __eq__(self, other): - """ - Compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if self.charge == other.charge and self.is_radical == other.is_radical: - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, AnyMetal): - return False - elif isinstance(other, Query) and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - return True - return False - - def __hash__(self): - return hash((self.charge, self.is_radical, self.neighbors, self.hybridization, self.ring_sizes, - self.implicit_hydrogens, self.heteroatoms)) - - -class AnyMetal(Query): - """ - Charge and radical ignored any metal. Rings, hydrogens and heteroatoms count also ignored. - - Class designed for d-elements matching in standardization. - """ - def __init__(self, *args, **kwargs): - super().__init__() - - @property - def atomic_symbol(self) -> str: - return 'M' - - @property - def atomic_number(self) -> int: - return 0 - - def __eq__(self, other): - if isinstance(other, Element): - if other.atomic_symbol not in _inorganic: - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - return True - elif isinstance(other, AnyMetal) and self.neighbors == other.neighbors \ - and self.hybridization == other.hybridization: - return True - return False - - def __hash__(self): - return hash((self.neighbors, self.hybridization)) - - -class ListElement(Query): - __slots__ = ('_elements', '_numbers') - - def __init__(self, elements: List[str], *args, **kwargs): - """ - Elements list - """ - super().__init__() - self._elements = tuple(elements) - self._numbers = tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() if x.__name__ in elements) - - @property - def atomic_symbol(self) -> str: - return ','.join(self._elements) - - @property - def atomic_number(self) -> int: - return 0 - - def copy(self): - copy = super().copy() - copy._elements = self._elements - copy._numbers = self._numbers - return copy - - def __eq__(self, other): - """ - Compare attached to molecules elements and query elements - """ - if isinstance(other, Element): - if other.atomic_number in self._numbers: - if self.charge != other.charge or self.is_radical != other.is_radical: - return False - if self.neighbors and other.neighbors not in self.neighbors: - return False - if self.hybridization and other.hybridization not in self.hybridization: - return False - if self.ring_sizes: - if self.ring_sizes[0]: - if set(self.ring_sizes).isdisjoint(other.ring_sizes): - return False - elif other.ring_sizes: # not in ring expected - return False - if self.implicit_hydrogens and other.implicit_hydrogens not in self.implicit_hydrogens: - return False - if self.heteroatoms and other.heteroatoms not in self.heteroatoms: - return False - return True - elif isinstance(other, (AnyElement, AnyMetal)): - return False - elif isinstance(other, Query) and self.charge == other.charge and self.is_radical == other.is_radical \ - and self.neighbors == other.neighbors and self.hybridization == other.hybridization \ - and self.ring_sizes == other.ring_sizes and self.implicit_hydrogens == other.implicit_hydrogens \ - and self.heteroatoms == other.heteroatoms: - if isinstance(other, ListElement): - return self._numbers == other._numbers - return other.atomic_number in self._numbers - return False - - def __hash__(self): - return hash((self._numbers, self.charge, self.is_radical, self.neighbors, self.hybridization, - self.ring_sizes, self.implicit_hydrogens, self.heteroatoms)) - - def __getstate__(self): - state = super().__getstate__() - state['elements'] = self._elements - return state - - def __setstate__(self, state): - self._elements = state['elements'] - self._numbers = tuple(x.atomic_number.fget(None) for x in Element.__subclasses__() - if x.__name__ in state['elements']) - super().__setstate__(state) - - def __repr__(self): - return f'{self.__class__.__name__}([{",".join(self._elements)}])' - - -__all__ = ['Query', 'QueryElement', 'AnyElement', 'AnyMetal', 'ListElement'] diff --git a/chython/periodictable/groupI.py b/chython/periodictable/groupI.py index 9b06949d..df3631f2 100644 --- a/chython/periodictable/groupI.py +++ b/chython/periodictable/groupI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -16,9 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupI -from .periods import * +from .base import Element +from .base.groups import GroupI +from .base.periods import * class H(Element, PeriodI, GroupI): @@ -48,6 +48,14 @@ def _valences_exceptions(self): def atomic_radius(self): return 0.53 + @property + def mdl_isotope(self): + return 1 + + @property + def is_forming_single_bonds(self): + return True + class Li(Element, PeriodII, GroupI): __slots__ = () @@ -76,6 +84,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 167 + @property + def mdl_isotope(self): + return 7 + class Na(Element, PeriodIII, GroupI): __slots__ = () @@ -104,6 +116,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.9 + @property + def mdl_isotope(self): + return 23 + class K(Element, PeriodIV, GroupI): __slots__ = () @@ -132,6 +148,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.43 + @property + def mdl_isotope(self): + return 39 + class Rb(Element, PeriodV, GroupI): __slots__ = () @@ -160,6 +180,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.65 + @property + def mdl_isotope(self): + return 85 + class Cs(Element, PeriodVI, GroupI): __slots__ = () @@ -188,6 +212,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.98 + @property + def mdl_isotope(self): + return 133 + class Fr(Element, PeriodVII, GroupI): __slots__ = () @@ -216,5 +244,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.98 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 223 + __all__ = ['H', 'Li', 'Na', 'K', 'Rb', 'Cs', 'Fr'] diff --git a/chython/periodictable/groupII.py b/chython/periodictable/groupII.py index 0df4a674..8b6337d0 100644 --- a/chython/periodictable/groupII.py +++ b/chython/periodictable/groupII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class Be(Element, PeriodII, GroupII): @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.12 + @property + def mdl_isotope(self): + return 9 + class Mg(Element, PeriodIII, GroupII): __slots__ = () @@ -81,6 +85,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 24 + class Ca(Element, PeriodIV, GroupII): __slots__ = () @@ -110,6 +118,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.94 + @property + def mdl_isotope(self): + return 40 + class Sr(Element, PeriodV, GroupII): __slots__ = () @@ -138,6 +150,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.19 + @property + def mdl_isotope(self): + return 88 + class Ba(Element, PeriodVI, GroupII): __slots__ = () @@ -167,6 +183,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.53 + @property + def mdl_isotope(self): + return 137 + class Ra(Element, PeriodVII, GroupII): __slots__ = () @@ -195,5 +215,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.53 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 226 + __all__ = ['Be', 'Mg', 'Ca', 'Sr', 'Ba', 'Ra'] diff --git a/chython/periodictable/groupIII.py b/chython/periodictable/groupIII.py index 60c57630..ca11c5f1 100644 --- a/chython/periodictable/groupIII.py +++ b/chython/periodictable/groupIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Sc(Element, PeriodIV, GroupIII): @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.84 + @property + def mdl_isotope(self): + return 45 + class Y(Element, PeriodV, GroupIII): __slots__ = () @@ -77,6 +81,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 + @property + def mdl_isotope(self): + return 89 + class La(Element, PeriodVI, GroupIII): __slots__ = () @@ -105,6 +113,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 139 + class Ce(Element, PeriodVI, GroupIII): __slots__ = () @@ -137,6 +149,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.12 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 140 + class Pr(Element, PeriodVI, GroupIII): __slots__ = () @@ -167,6 +183,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.47 + @property + def mdl_isotope(self): + return 141 + class Nd(Element, PeriodVI, GroupIII): __slots__ = () @@ -208,6 +228,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.06 + @property + def mdl_isotope(self): + return 144 + class Pm(Element, PeriodVI, GroupIII): __slots__ = () @@ -236,6 +260,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.05 + @property + def mdl_isotope(self): + return 145 + class Sm(Element, PeriodVI, GroupIII): __slots__ = () @@ -277,6 +305,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.38 + @property + def mdl_isotope(self): + return 150 + class Eu(Element, PeriodVI, GroupIII): __slots__ = () @@ -316,6 +348,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.31 + @property + def mdl_isotope(self): + return 152 + class Gd(Element, PeriodVI, GroupIII): __slots__ = () @@ -345,6 +381,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.33 + @property + def mdl_isotope(self): + return 157 + class Tb(Element, PeriodVI, GroupIII): __slots__ = () @@ -375,6 +415,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.25 + @property + def mdl_isotope(self): + return 159 + class Dy(Element, PeriodVI, GroupIII): __slots__ = () @@ -406,6 +450,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.28 + @property + def mdl_isotope(self): + return 163 + class Ho(Element, PeriodVI, GroupIII): __slots__ = () @@ -445,6 +493,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.26 + @property + def mdl_isotope(self): + return 165 + class Er(Element, PeriodVI, GroupIII): __slots__ = () @@ -473,6 +525,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.26 + @property + def mdl_isotope(self): + return 167 + class Tm(Element, PeriodVI, GroupIII): __slots__ = () @@ -512,6 +568,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.22 + @property + def mdl_isotope(self): + return 169 + class Yb(Element, PeriodVI, GroupIII): __slots__ = () @@ -552,6 +612,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.22 + @property + def mdl_isotope(self): + return 173 + class Lu(Element, PeriodVI, GroupIII): __slots__ = () @@ -580,6 +644,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 + @property + def mdl_isotope(self): + return 175 + class Ac(Element, PeriodVII, GroupIII): __slots__ = () @@ -608,6 +676,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 227 + class Th(Element, PeriodVII, GroupIII): __slots__ = () @@ -641,6 +713,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 232 + class Pa(Element, PeriodVII, GroupIII): __slots__ = () @@ -671,6 +747,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 231 + class U(Element, PeriodVII, GroupIII): __slots__ = () @@ -700,6 +780,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 238 + class Np(Element, PeriodVII, GroupIII): __slots__ = () @@ -730,6 +814,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 237 + class Pu(Element, PeriodVII, GroupIII): __slots__ = () @@ -768,6 +856,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 244 + class Am(Element, PeriodVII, GroupIII): __slots__ = () @@ -796,6 +888,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 243 + class Cm(Element, PeriodVII, GroupIII): __slots__ = () @@ -824,6 +920,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 247 + class Bk(Element, PeriodVII, GroupIII): __slots__ = () @@ -852,6 +952,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 247 + class Cf(Element, PeriodVII, GroupIII): __slots__ = () @@ -880,6 +984,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 251 + class Es(Element, PeriodVII, GroupIII): __slots__ = () @@ -908,6 +1016,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 252 + class Fm(Element, PeriodVII, GroupIII): __slots__ = () @@ -936,6 +1048,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 257 + class Md(Element, PeriodVII, GroupIII): __slots__ = () @@ -964,6 +1080,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 258 + class No(Element, PeriodVII, GroupIII): __slots__ = () @@ -992,6 +1112,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 259 + class Lr(Element, PeriodVII, GroupIII): __slots__ = () @@ -1020,6 +1144,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.17 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 260 + __all__ = ['Sc', 'Y', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', diff --git a/chython/periodictable/groupIV.py b/chython/periodictable/groupIV.py index cc22146a..70c626b8 100644 --- a/chython/periodictable/groupIV.py +++ b/chython/periodictable/groupIV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIV -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIV +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Ti(Element, PeriodIV, GroupIV): @@ -80,6 +80,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.76 + @property + def mdl_isotope(self): + return 48 + class Zr(Element, PeriodV, GroupIV): __slots__ = () @@ -127,6 +131,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.06 + @property + def mdl_isotope(self): + return 91 + class Hf(Element, PeriodVI, GroupIV): __slots__ = () @@ -162,6 +170,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.08 + @property + def mdl_isotope(self): + return 178 + class Rf(Element, PeriodVII, GroupIV): __slots__ = () @@ -190,5 +202,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.08 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 261 + __all__ = ['Ti', 'Zr', 'Hf', 'Rf'] diff --git a/chython/periodictable/groupIX.py b/chython/periodictable/groupIX.py index 6cf22449..b1fe8055 100644 --- a/chython/periodictable/groupIX.py +++ b/chython/periodictable/groupIX.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupIX -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupIX +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Co(Element, PeriodIV, GroupIX): @@ -71,6 +71,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.52 + @property + def mdl_isotope(self): + return 59 + class Rh(Element, PeriodV, GroupIX): __slots__ = () @@ -108,6 +112,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.73 + @property + def mdl_isotope(self): + return 103 + class Ir(Element, PeriodVI, GroupIX): __slots__ = () @@ -148,6 +156,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.8 + @property + def mdl_isotope(self): + return 192 + class Mt(Element, PeriodVII, GroupIX): __slots__ = () @@ -176,5 +188,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.8 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 278 + __all__ = ['Co', 'Rh', 'Ir', 'Mt'] diff --git a/chython/periodictable/groupV.py b/chython/periodictable/groupV.py index e923cec1..67e56d7d 100644 --- a/chython/periodictable/groupV.py +++ b/chython/periodictable/groupV.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # Copyright 2019 Tagir Akhmetshin # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupV -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupV +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class V(Element, PeriodIV, GroupV): @@ -68,6 +68,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 + @property + def mdl_isotope(self): + return 51 + class Nb(Element, PeriodV, GroupV): __slots__ = () @@ -111,6 +115,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.98 + @property + def mdl_isotope(self): + return 93 + class Ta(Element, PeriodVI, GroupV): __slots__ = () @@ -144,6 +152,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.0 + @property + def mdl_isotope(self): + return 181 + class Db(Element, PeriodVII, GroupV): __slots__ = () @@ -172,5 +184,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 2.0 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['V', 'Nb', 'Ta', 'Db'] diff --git a/chython/periodictable/groupVI.py b/chython/periodictable/groupVI.py index 6fa24b94..0511d734 100644 --- a/chython/periodictable/groupVI.py +++ b/chython/periodictable/groupVI.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Dayana Bashirova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVI -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVI +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Cr(Element, PeriodIV, GroupVI): @@ -59,6 +59,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.66 + @property + def mdl_isotope(self): + return 52 + class Mo(Element, PeriodV, GroupVI): __slots__ = () @@ -102,6 +106,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.90 + @property + def mdl_isotope(self): + return 96 + class W(Element, PeriodVI, GroupVI): __slots__ = () @@ -135,6 +143,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.93 + @property + def mdl_isotope(self): + return 184 + class Sg(Element, PeriodVII, GroupVI): __slots__ = () @@ -163,5 +175,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.93 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 269 + __all__ = ['Cr', 'Mo', 'W', 'Sg'] diff --git a/chython/periodictable/groupVII.py b/chython/periodictable/groupVII.py index c66e89d9..f754b97e 100644 --- a/chython/periodictable/groupVII.py +++ b/chython/periodictable/groupVII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Alexander Nikanshin <17071996sasha@gmail.com> # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Mn(Element, PeriodIV, GroupVII): @@ -57,6 +57,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.61 + @property + def mdl_isotope(self): + return 55 + class Tc(Element, PeriodV, GroupVII): __slots__ = () @@ -86,6 +90,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.83 + @property + def mdl_isotope(self): + return 98 + class Re(Element, PeriodVI, GroupVII): __slots__ = () @@ -114,6 +122,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.88 + @property + def mdl_isotope(self): + return 186 + class Bh(Element, PeriodVII, GroupVII): __slots__ = () @@ -142,5 +154,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.88 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['Mn', 'Tc', 'Re', 'Bh'] diff --git a/chython/periodictable/groupVIII.py b/chython/periodictable/groupVIII.py index 3d88324b..15056c3f 100644 --- a/chython/periodictable/groupVIII.py +++ b/chython/periodictable/groupVIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupVIII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupVIII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Fe(Element, PeriodIV, GroupVIII): @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 56 + class Ru(Element, PeriodV, GroupVIII): __slots__ = () @@ -81,6 +85,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.78 + @property + def mdl_isotope(self): + return 101 + class Os(Element, PeriodVI, GroupVIII): __slots__ = () @@ -113,6 +121,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.85 + @property + def mdl_isotope(self): + return 190 + class Hs(Element, PeriodVII, GroupVIII): __slots__ = () @@ -141,5 +153,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.85 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 270 + __all__ = ['Fe', 'Ru', 'Os', 'Hs'] diff --git a/chython/periodictable/groupX.py b/chython/periodictable/groupX.py index 80a499a4..8c8b2c08 100644 --- a/chython/periodictable/groupX.py +++ b/chython/periodictable/groupX.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupX -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupX +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Ni(Element, PeriodIV, GroupX): @@ -52,6 +52,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.49 + @property + def mdl_isotope(self): + return 59 + class Pd(Element, PeriodV, GroupX): __slots__ = () @@ -85,6 +89,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.69 + @property + def mdl_isotope(self): + return 106 + class Pt(Element, PeriodVI, GroupX): __slots__ = () @@ -118,6 +126,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.77 + @property + def mdl_isotope(self): + return 195 + class Ds(Element, PeriodVII, GroupX): __slots__ = () @@ -146,5 +158,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.77 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 281 + __all__ = ['Ni', 'Pd', 'Pt', 'Ds'] diff --git a/chython/periodictable/groupXI.py b/chython/periodictable/groupXI.py index 40bc7c91..1c80d3d5 100644 --- a/chython/periodictable/groupXI.py +++ b/chython/periodictable/groupXI.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXI -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXI +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Cu(Element, PeriodIV, GroupXI): @@ -52,6 +52,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 64 + class Ag(Element, PeriodV, GroupXI): __slots__ = () @@ -84,6 +88,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.65 + @property + def mdl_isotope(self): + return 108 + class Au(Element, PeriodVI, GroupXI): __slots__ = () @@ -116,6 +124,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.74 + @property + def mdl_isotope(self): + return 197 + class Rg(Element, PeriodVII, GroupXI): __slots__ = () @@ -144,5 +156,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.74 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 281 + __all__ = ['Cu', 'Ag', 'Au', 'Rg'] diff --git a/chython/periodictable/groupXII.py b/chython/periodictable/groupXII.py index 7b48dfad..2b59c90b 100644 --- a/chython/periodictable/groupXII.py +++ b/chython/periodictable/groupXII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Dayana Bashirova # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXII -from .periods import PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXII +from .base.periods import PeriodIV, PeriodV, PeriodVI, PeriodVII class Zn(Element, PeriodIV, GroupXII): @@ -50,6 +50,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.42 + @property + def mdl_isotope(self): + return 65 + class Cd(Element, PeriodV, GroupXII): __slots__ = () @@ -80,6 +84,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.61 + @property + def mdl_isotope(self): + return 112 + class Hg(Element, PeriodVI, GroupXII): __slots__ = () @@ -110,6 +118,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 + @property + def mdl_isotope(self): + return 201 + class Cn(Element, PeriodVII, GroupXII): __slots__ = () @@ -138,5 +150,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.71 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 285 + __all__ = ['Zn', 'Cd', 'Hg', 'Cn'] diff --git a/chython/periodictable/groupXIII.py b/chython/periodictable/groupXIII.py index dd5d728c..e89d745f 100644 --- a/chython/periodictable/groupXIII.py +++ b/chython/periodictable/groupXIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2023 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # Copyright 2019 Tansu Nasyrova # This file is part of chython. @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXIII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXIII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class B(Element, PeriodII, GroupXIII): @@ -51,6 +51,18 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 11 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Al(Element, PeriodIII, GroupXIII): __slots__ = () @@ -81,6 +93,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.18 + @property + def mdl_isotope(self): + return 27 + class Ga(Element, PeriodIV, GroupXIII): __slots__ = () @@ -115,6 +131,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.36 + @property + def mdl_isotope(self): + return 70 + class In(Element, PeriodV, GroupXIII): __slots__ = () @@ -145,6 +165,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 115 + class Tl(Element, PeriodVI, GroupXIII): __slots__ = () @@ -175,6 +199,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 + @property + def mdl_isotope(self): + return 204 + class Nh(Element, PeriodVII, GroupXIII): __slots__ = () @@ -203,5 +231,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.56 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 278 + __all__ = ['B', 'Al', 'Ga', 'In', 'Tl', 'Nh'] diff --git a/chython/periodictable/groupXIV.py b/chython/periodictable/groupXIV.py index ae2be925..43cca943 100644 --- a/chython/periodictable/groupXIV.py +++ b/chython/periodictable/groupXIV.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXIV -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXIV +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class C(Element, PeriodII, GroupXIV): @@ -50,6 +50,18 @@ def _valences_exceptions(self): def atomic_radius(self): return .67 + @property + def mdl_isotope(self): + return 12 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Si(Element, PeriodIII, GroupXIV): __slots__ = () @@ -78,6 +90,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.11 + @property + def mdl_isotope(self): + return 28 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Ge(Element, PeriodIV, GroupXIV): __slots__ = () @@ -106,6 +130,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.25 + @property + def mdl_isotope(self): + return 73 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Sn(Element, PeriodV, GroupXIV): __slots__ = () @@ -144,6 +180,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.45 + @property + def mdl_isotope(self): + return 119 + class Pb(Element, PeriodVI, GroupXIV): __slots__ = () @@ -182,6 +222,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.54 + @property + def mdl_isotope(self): + return 207 + class Fl(Element, PeriodVII, GroupXIV): __slots__ = () @@ -210,5 +254,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.54 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 289 + __all__ = ['C', 'Si', 'Ge', 'Sn', 'Pb', 'Fl'] diff --git a/chython/periodictable/groupXV.py b/chython/periodictable/groupXV.py index 52f9b545..5f031016 100644 --- a/chython/periodictable/groupXV.py +++ b/chython/periodictable/groupXV.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXV -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXV +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class N(Element, PeriodII, GroupXV): @@ -51,6 +51,18 @@ def _valences_exceptions(self): def atomic_radius(self): return .56 + @property + def mdl_isotope(self): + return 14 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class P(Element, PeriodIII, GroupXV): __slots__ = () @@ -86,6 +98,18 @@ def _valences_exceptions(self): def atomic_radius(self): return .98 + @property + def mdl_isotope(self): + return 31 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class As(Element, PeriodIV, GroupXV): __slots__ = () @@ -114,6 +138,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.14 + @property + def mdl_isotope(self): + return 75 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Sb(Element, PeriodV, GroupXV): __slots__ = () @@ -143,6 +179,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.33 + @property + def mdl_isotope(self): + return 122 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Bi(Element, PeriodVI, GroupXV): __slots__ = () @@ -188,6 +236,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.43 + @property + def mdl_isotope(self): + return 209 + class Mc(Element, PeriodVII, GroupXV): __slots__ = () @@ -216,5 +268,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.43 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 289 + __all__ = ['N', 'P', 'As', 'Sb', 'Bi', 'Mc'] diff --git a/chython/periodictable/groupXVI.py b/chython/periodictable/groupXVI.py index fd060971..0c782531 100644 --- a/chython/periodictable/groupXVI.py +++ b/chython/periodictable/groupXVI.py @@ -19,9 +19,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVI -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXVI +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class O(Element, PeriodII, GroupXVI): @@ -51,6 +51,18 @@ def _valences_exceptions(self): def atomic_radius(self): return .48 + @property + def mdl_isotope(self): + return 16 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class S(Element, PeriodIII, GroupXVI): __slots__ = () @@ -227,6 +239,18 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 32 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Se(Element, PeriodIV, GroupXVI): __slots__ = () @@ -286,6 +310,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.03 + @property + def mdl_isotope(self): + return 79 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Te(Element, PeriodV, GroupXVI): __slots__ = () @@ -336,6 +372,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.23 + @property + def mdl_isotope(self): + return 128 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Po(Element, PeriodVI, GroupXVI): __slots__ = () @@ -369,6 +417,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.35 + @property + def mdl_isotope(self): + return 209 + class Lv(Element, PeriodVII, GroupXVI): __slots__ = () @@ -397,5 +449,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.35 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 293 + __all__ = ['O', 'S', 'Se', 'Te', 'Po', 'Lv'] diff --git a/chython/periodictable/groupXVII.py b/chython/periodictable/groupXVII.py index 064722c2..3be4f6a7 100644 --- a/chython/periodictable/groupXVII.py +++ b/chython/periodictable/groupXVII.py @@ -18,9 +18,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVII -from .periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII +from .base import Element +from .base.groups import GroupXVII +from .base.periods import PeriodII, PeriodIII, PeriodIV, PeriodV, PeriodVI, PeriodVII class F(Element, PeriodII, GroupXVII): @@ -50,6 +50,14 @@ def _valences_exceptions(self): def atomic_radius(self): return .42 + @property + def mdl_isotope(self): + return 19 + + @property + def is_forming_single_bonds(self): + return True + class Cl(Element, PeriodIII, GroupXVII): __slots__ = () @@ -89,6 +97,18 @@ def _valences_exceptions(self): def atomic_radius(self): return .79 + @property + def mdl_isotope(self): + return 35 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Br(Element, PeriodIV, GroupXVII): __slots__ = () @@ -135,6 +155,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 0.94 + @property + def mdl_isotope(self): + return 80 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class I(Element, PeriodV, GroupXVII): __slots__ = () @@ -203,6 +235,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.15 + @property + def mdl_isotope(self): + return 127 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class At(Element, PeriodVI, GroupXVII): __slots__ = () @@ -232,6 +276,18 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.27 + @property + def mdl_isotope(self): + return 210 + + @property + def is_forming_single_bonds(self): + return True + + @property + def is_forming_double_bonds(self): + return True + class Ts(Element, PeriodVII, GroupXVII): __slots__ = () @@ -260,5 +316,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.27 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 297 + __all__ = ['F', 'Cl', 'Br', 'I', 'At', 'Ts'] diff --git a/chython/periodictable/groupXVIII.py b/chython/periodictable/groupXVIII.py index 692fd9b4..b8137593 100644 --- a/chython/periodictable/groupXVIII.py +++ b/chython/periodictable/groupXVIII.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2021 Ramil Nugmanov +# Copyright 2019-2024 Ramil Nugmanov # Copyright 2019 Tagir Akhmetshin # This file is part of chython. # @@ -17,9 +17,9 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from .element import Element -from .groups import GroupXVIII -from .periods import * +from .base import Element +from .base.groups import GroupXVIII +from .base.periods import * class He(Element, PeriodI, GroupXVIII): @@ -49,6 +49,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .31 + @property + def mdl_isotope(self): + return 4 + class Ne(Element, PeriodII, GroupXVIII): __slots__ = () @@ -77,6 +81,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .38 + @property + def mdl_isotope(self): + return 20 + class Ar(Element, PeriodIII, GroupXVIII): __slots__ = () @@ -105,6 +113,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .71 + @property + def mdl_isotope(self): + return 40 + class Kr(Element, PeriodIV, GroupXVIII): __slots__ = () @@ -133,6 +145,10 @@ def _valences_exceptions(self): def atomic_radius(self): return .87 + @property + def mdl_isotope(self): + return 84 + class Xe(Element, PeriodV, GroupXVIII): __slots__ = () @@ -172,6 +188,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.08 + @property + def mdl_isotope(self): + return 131 + class Rn(Element, PeriodVI, GroupXVIII): __slots__ = () @@ -200,6 +220,10 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.2 + @property + def mdl_isotope(self): + return 222 + class Og(Element, PeriodVII, GroupXVIII): __slots__ = () @@ -228,5 +252,9 @@ def _valences_exceptions(self): def atomic_radius(self): return 1.2 # unknown, taken radius of previous element in group + @property + def mdl_isotope(self): + return 294 + __all__ = ['He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', 'Og'] diff --git a/chython/reactor/base.py b/chython/reactor/base.py index 30212b08..65a5966e 100644 --- a/chython/reactor/base.py +++ b/chython/reactor/base.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2014-2023 Ramil Nugmanov +# Copyright 2014-2025 Ramil Nugmanov # Copyright 2019 Adelia Fatykhova # This file is part of chython. # @@ -17,249 +17,196 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from collections import defaultdict -from itertools import product +from typing import Union from ..containers import MoleculeContainer, QueryContainer from ..containers.bonds import Bond -from ..periodictable import Element, ListElement, AnyElement +from ..periodictable import Element, AnyElement, QueryElement class BaseReactor: - def __init__(self, reactants, products, delete_atoms, fix_rings, fix_tautomers): - self.__to_delete = reactants.difference(products) if delete_atoms else () + def __init__(self, pattern, replacement, delete_atoms, fix_rings, fix_tautomers): + if isinstance(replacement, QueryContainer): + for n, a in replacement.atoms(): + if not isinstance(a, (AnyElement, QueryElement)): + raise TypeError('Unsupported query atom type') + elif len(a.implicit_hydrogens) > 1: + raise ValueError('Query element in patch has more than one implicit hydrogen clause') + for *_, b in replacement.bonds(): + if len(b.order) > 1: + raise ValueError('Variable bond in replacement') - # prepare atoms patch - self.__elements = elements = {} - self.__hydrogens = hydrogens = {} - self.__variable = variable = [] + self._to_delete = {n for n, a in pattern.atoms() if not a.masked} - set(replacement) if delete_atoms else () + self._replacement = replacement + self._fix_rings = fix_rings + self._fix_tautomers = fix_tautomers - atoms = defaultdict(dict) - for n, atom in products.atoms(): - atoms[n].update(charge=atom.charge, is_radical=atom.is_radical) - if atom.atomic_number: # replace atom - elements[n] = Element.from_atomic_number(atom.atomic_number)(atom.isotope) - if n not in reactants and isinstance(products, MoleculeContainer): - atoms[n]['xy'] = atom.xy - if atom.implicit_hydrogens is not None: - hydrogens[n] = atom.implicit_hydrogens # save available H count - elif n not in reactants: - if not isinstance(atom, ListElement): - raise ValueError('New atom should be defined') - elements[n] = [Element.from_symbol(x)() for x in atom._elements] - variable.append(n) - else: # use atom from reactant - if not isinstance(atom, AnyElement): - raise ValueError('Only AnyElement can be used for matched atom propagation') - elements[n] = None + def _get_deleted(self, structure, mapping): + if not self._to_delete: + return set() - if isinstance(products, QueryContainer): - bonds = [] - for n, m, b in products.bonds(): - if len(b.order) > 1: - raise ValueError('bond list in patch not supported') + bonds = structure._bonds + to_delete = {mapping[x] for x in self._to_delete} + # if deleted atoms have another path to remain fragment, the path is preserved + remain = set(mapping.values()).difference(to_delete) + delete, global_seen = set(), set() + for x in to_delete: + for n in bonds[x]: + if n in global_seen or n in remain: + continue + seen = {n} + global_seen.add(n) + stack = [x for x in bonds[n] if x not in global_seen] + while stack: + current = stack.pop() + if current in remain: + break + if current in to_delete: + continue + seen.add(current) + global_seen.add(current) + stack.extend([x for x in bonds[current] if x not in global_seen]) else: - bonds.append((n, m, Bond(b.order[0]))) - else: - bonds = [(n, m, b.copy()) for n, m, b in products.bonds()] + delete.update(seen) - self.__bonds = bonds - self.__atom_attrs = dict(atoms) - self.__products = products - self.__fix_rings = fix_rings - self.__fix_tautomers = fix_tautomers + to_delete.update(delete) + return to_delete def _patcher(self, structure: MoleculeContainer, mapping): - elements = self.__elements - variable = self.__variable - - new = self.__prepare_skeleton(structure, mapping) - self.__set_stereo(new, structure, mapping) - - if not variable: - if self.__fix_rings: - new.kekule() # keeps stereo as is - if not new.thiele(fix_tautomers=self.__fix_tautomers): # fixes stereo if any ring aromatized - new.fix_stereo() - else: - new.fix_stereo() - yield new - else: - copy = new.copy() - if self.__fix_rings: - copy.kekule() - if not copy.thiele(fix_tautomers=self.__fix_tautomers): - copy.fix_stereo() - else: - copy.fix_stereo() - yield copy + satoms = structure._atoms + sbonds = structure._bonds - for atoms in product(*(elements[x][1:] for x in variable)): - copy = new.copy() - for n, atom in zip(variable, atoms): - n = mapping[n] - # replace atom - copy._atoms[n] = a = atom.copy() # noqa - a._attach_graph(copy, n) # noqa - copy._calc_implicit(n) # noqa - if self.__fix_rings: - copy.kekule() - if not copy.thiele(fix_tautomers=self.__fix_tautomers): - copy.fix_stereo() - else: - copy.fix_stereo() + to_delete = self._get_deleted(structure, mapping) + new = structure.__class__() + natoms = new._atoms + nbonds = new._bonds + max_atom = max(satoms) + stereo_atoms = [] + stereo_bonds = [] + + # let's preserve connectivity order from replacement to keep stereo signs as is. + # stereo labels from original structure will be recalculated after full molecule construction. + for n, ra in self._replacement.atoms(): + if isinstance(ra, AnyElement): + if m := mapping.get(n): + # keep matched atom type and isotope + sa = satoms[m] + a = sa.copy() + a.charge = ra.charge + a.is_radical = ra.is_radical + if ra.stereo is not None: # override stereo + a._stereo = ra.stereo + elif sa.stereo is not None: # keep original stereo + stereo_atoms.append(m) # mark for stereo fix else: - copy.fix_stereo() - yield copy - - def __prepare_skeleton(self, structure, mapping): - elements = self.__elements - patch_hydrogens = self.__hydrogens - patch_bonds = self.__bonds - variable = self.__variable - - atoms = structure._atoms - plane = structure._plane - bonds = structure._bonds - charges = structure._charges - radicals = structure._radicals - hydrogens = structure._hydrogens - - to_delete = {mapping[x] for x in self.__to_delete} - if to_delete: - # if deleted atoms have another path to remain fragment, the path is preserved - remain = set(mapping.values()).difference(to_delete) - delete, global_seen = set(), set() - for x in to_delete: - for n in bonds[x]: - if n in global_seen or n in remain: + raise ValueError("AnyElement doesn't match to pattern") + else: # QueryElement or Element + ra: Union[QueryElement, Element] # typehint + e = Element.from_atomic_number(ra.atomic_number) + a = e(ra.isotope, charge=ra.charge, is_radical=ra.is_radical) + if not (m := mapping.get(n)): # new atom + m = max_atom + 1 + mapping[n] = max_atom = m + a._stereo = ra.stereo # keep stereo from patch for new atoms + if isinstance(ra, Element): + a._implicit_hydrogens = ra.implicit_hydrogens # keep H count from patch + a.xy = ra.xy # keep coordinates from patch + elif ra.implicit_hydrogens: # keep H count from patch + a._implicit_hydrogens = ra.implicit_hydrogens[0] + else: # existing atoms + sa = satoms[m] + a.xy = sa.xy # preserve existing coordinates + if ra.stereo is not None: + a._stereo = ra.stereo + elif sa.stereo is not None: # keep original stereo + stereo_atoms.append(m) + natoms[m] = a + nbonds[m] = {} + + # preserve connectivity order + for n, bs in self._replacement._bonds.items(): + n = mapping[n] + for m, rb in bs.items(): + m = mapping[m] + if n in nbonds[m]: # back-link + nbonds[n][m] = nbonds[m][n] + else: + nbonds[n][m] = b = Bond(int(rb)) + if rb.stereo is not None: # override stereo + b._stereo = rb.stereo + # check bond exists in source and has stereo label and the same order + elif (sbn := sbonds.get(n)) is None or (sb := sbn.get(m)) is None or sb.stereo is None or sb != b: continue - seen = {n} - global_seen.add(n) - stack = [x for x in bonds[n] if x not in global_seen] - while stack: - current = stack.pop() - if current in remain: - break - if current in to_delete: - continue - seen.add(current) - global_seen.add(current) - stack.extend([x for x in bonds[current] if x not in global_seen]) + else: # original structure has stereo bond + stereo_bonds.append((n, m)) + + patched_atoms = set(new) + for n, sa in satoms.items(): # add unmatched or masked atoms + if n not in patched_atoms and n not in to_delete: + natoms[n] = a = sa.copy(hydrogens=True) + nbonds[n] = {} + if sa.stereo is not None: + # in case of allenes label can disappear/change, thus, requires recalculation + # for tetrahedrons label can be stored as is + if n in structure.stereogenic_tetrahedrons: + a._stereo = sa.stereo else: - delete.update(seen) - - to_delete.update(delete) + stereo_atoms.append(n) - new = structure.__class__() - keep_hydrogens = {} - max_atom = max(atoms) - for n, atom in self.__atom_attrs.items(): - if n in mapping: # add matched atoms - m = mapping[n] - e = elements[n] - if e is None: - e = atoms[m] - new.add_atom(e.copy(), m, xy=plane[m], _skip_hydrogen_calculation=True, **atom) - else: # new atoms - max_atom += 1 - if n in variable: - # use first from the list - mapping[n] = new.add_atom(elements[n][0].copy(), max_atom, _skip_hydrogen_calculation=True, **atom) - else: - mapping[n] = new.add_atom(elements[n].copy(), max_atom, _skip_hydrogen_calculation=True, **atom) - if n in patch_hydrogens: # keep patch aromatic atoms hydrogens count - keep_hydrogens[max_atom] = patch_hydrogens[n] - - patch_atoms = set(new) # don't move! - for n, atom in structure.atoms(): # add unmatched atoms - if n not in patch_atoms and n not in to_delete: - new.add_atom(atom.copy(), n, charge=charges[n], is_radical=radicals[n], xy=plane[n], - _skip_hydrogen_calculation=True) - keep_hydrogens[n] = hydrogens[n] # keep hydrogens on unmatched atoms as is. - - for n, m, bond in patch_bonds: # add patch bonds - new.add_bond(mapping[n], mapping[m], bond.copy(), _skip_hydrogen_calculation=True) - - for n, m_bond in bonds.items(): + for n, bs in sbonds.items(): # preserve connectivity order for keeping stereo labels as is if n in to_delete: # atoms for removing continue - to_delete.add(n) # reuse to_delete set for seen atoms - for m, bond in m_bond.items(): + for m, b in bs.items(): # ignore deleted atoms and patch atoms - if m in to_delete or n in patch_atoms and m in patch_atoms: + if m in to_delete or n in patched_atoms and m in patched_atoms: continue - new.add_bond(n, m, bond.copy(), _skip_hydrogen_calculation=True) - - # fix hydrogens count. - new._hydrogens.update(keep_hydrogens) # noqa - for n in new: - if n not in keep_hydrogens: - new._calc_implicit(n) # noqa - return new - - def __set_stereo(self, new, structure, mapping): - products = self.__products - stereo_override = set() - r_mapping = {m: n for n, m in mapping.items()} - - # set patch atoms stereo - for n, s in products._atoms_stereo.items(): - m = mapping[n] - new._atoms_stereo[m] = products._translate_tetrahedron_sign(n, [r_mapping[x] for x in - new._stereo_tetrahedrons[m]], s) - stereo_override.add(m) - - for n, s in products._allenes_stereo.items(): - m = mapping[n] - t1, t2, *_ = new._stereo_allenes[m] - new._allenes_stereo[m] = products._translate_allene_sign(n, r_mapping[t1], r_mapping[t2], s) - stereo_override.add(m) - - for (n, m), s in products._cis_trans_stereo.items(): - nm = (mapping[n], mapping[m]) - try: - t1, t2, *_ = new._stereo_cis_trans[nm] - except KeyError: - nm = nm[::-1] - t2, t1, *_ = new._stereo_cis_trans[nm] - new._cis_trans_stereo[nm] = products._translate_cis_trans_sign(n, m, r_mapping[t1], r_mapping[t2], s) - stereo_override.update(nm) - - # set unmatched part stereo and not overridden by patch. - for n, s in structure._atoms_stereo.items(): - if n in stereo_override or n not in new._stereo_tetrahedrons or \ - new._bonds[n].keys() != structure._bonds[n].keys(): - # skip atoms with changed neighbors + elif n in nbonds[m]: # back-link + nbonds[n][m] = nbonds[m][n] + else: + nbonds[n][m] = b.copy() + if b.stereo is not None: + # stereo label should be recalculated + stereo_bonds.append((n, m)) + + for n, a in new.atoms(): + if a.implicit_hydrogens is None: + new.calc_implicit(n) + new.calc_labels() + + # translate stereo sign from old order to new order + for n in stereo_atoms: + if n in new.stereogenic_tetrahedrons: + if sbonds[n].keys() == nbonds[n].keys(): + # flush stereo from reaction center. should be explicitly set in replacement. + s = new._translate_tetrahedron_sign(n, structure.stereogenic_tetrahedrons[n], satoms[n].stereo) + natoms[n]._stereo = s + elif n in new.stereogenic_allenes: + if set(new.stereogenic_allenes[n]) == set(structure.stereogenic_allenes[n]): + # flush stereo for changed allene substituents + s = new._translate_allene_sign(n, *structure.stereogenic_allenes[n][:2], satoms[n].stereo) + natoms[n]._stereo = s + # else: ignore label + + for n, m in stereo_bonds: + # check if bond is center of cumulene + if (n12 := new._stereo_cis_trans_terminals.get(n, True)) != new._stereo_cis_trans_terminals.get(m, False): continue - new._atoms_stereo[n] = structure._translate_tetrahedron_sign(n, new._stereo_tetrahedrons[n], s) - - for n, s in structure._allenes_stereo.items(): - if n in stereo_override or n not in new._stereo_allenes or \ - set(new._stereo_allenes[n]) != set(structure._stereo_allenes[n]): - # skip changed allenes + s12 = structure._stereo_cis_trans_terminals[n] + # check if cumulene terminals are the same + if set(n12) != set(s12): continue - t1, t2, *_ = new._stereo_allenes[n] - new._allenes_stereo[n] = structure._translate_allene_sign(n, t1, t2, s) - - for nm, s in structure._cis_trans_stereo.items(): - n, m = nm - if n in stereo_override or m in stereo_override: - continue - env = structure._stereo_cis_trans[nm] - try: - new_env = new._stereo_cis_trans[nm] - except KeyError: - nm = nm[::-1] - try: - new_env = new._stereo_cis_trans[nm] - except KeyError: - continue - t2, t1, *_ = new_env - else: - t1, t2, *_ = new_env - if set(env) != set(new_env): - continue - new._cis_trans_stereo[nm] = structure._translate_cis_trans_sign(n, m, t1, t2, s) + if set(new.stereogenic_cis_trans[n12]) == set(env := structure.stereogenic_cis_trans[s12]): + # connected to cumulenes atoms should be the same + s = new._translate_cis_trans_sign(*n12, *env[:2], sbonds[n][m].stereo) + nbonds[n][m]._stereo = s + # else: ignore label + + if self._fix_rings: + new.kekule() # keeps stereo as is + if not new.thiele(fix_tautomers=self._fix_tautomers): # fixes stereo if any ring aromatized + new.fix_stereo() + else: + new.fix_stereo() + return new __all__ = ['BaseReactor'] diff --git a/chython/reactor/reactor.py b/chython/reactor/reactor.py index 08cb024c..ab633227 100644 --- a/chython/reactor/reactor.py +++ b/chython/reactor/reactor.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2024 Ramil Nugmanov +# Copyright 2019-2025 Ramil Nugmanov # Copyright 2019 Adelia Fatykhova # This file is part of chython. # @@ -62,29 +62,28 @@ def __init__(self, patterns: Tuple[QueryContainer, ...], raise TypeError('invalid params') elif not all(isinstance(x, (QueryContainer, MoleculeContainer)) for x in products): raise TypeError('invalid params') - self.patterns = patterns - self.products = products + self._patterns = patterns + self._products = products - self.__one_shot = one_shot - self.__polymerise_limit = polymerise_limit - self.__products_atoms = tuple(set(m) for m in products) - self.__automorphism_filter = automorphism_filter - super().__init__({n for x in patterns for n, h in x._masked.items() if not h}, reduce(or_, products), - delete_atoms, fix_aromatic_rings, fix_tautomers) + self._one_shot = one_shot + self._polymerise_limit = polymerise_limit + self._products_atoms = tuple(set(m) for m in products) + self._automorphism_filter = automorphism_filter + super().__init__(reduce(or_, patterns), reduce(or_, products), delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, *structures: MoleculeContainer): if any(not isinstance(structure, MoleculeContainer) for structure in structures): raise TypeError('only list of Molecules possible') - len_patterns = len(self.patterns) + len_patterns = len(self._patterns) structures = fix_mapping_overlap(structures) s_nums = set(range(len(structures))) seen = set() - if self.__one_shot: + if self._one_shot: for chosen in permutations(s_nums, len_patterns): ignored = [structures[x] for x in s_nums.difference(chosen)] chosen = [structures[x] for x in chosen] - for new in self.__single_stage(chosen, {x for x in ignored for x in x}): + for new in self._single_stage(chosen, {x for x in ignored for x in x}): # store reacted molecules in same order as matched pattern r = ReactionContainer([x.copy() for x in chosen] + [x.copy() for x in ignored], new + [x.copy() for x in ignored]) @@ -100,14 +99,14 @@ def __call__(self, *structures: MoleculeContainer): while queue: chosen, ignored, depth = queue.popleft() depth += 1 - for new in self.__single_stage(chosen, {x for x in ignored for x in x}): + for new in self._single_stage(chosen, {x for x in ignored for x in x}): r = ReactionContainer([x.copy() for x in structures], new + [x.copy() for x in ignored]) if len(new) > 1: r.contract_ions() # try to keep salts if str(r) in seen: continue seen.add(str(r)) - if len(r.products) != len(ignored) + len(self.__products_atoms): + if len(r.products) != len(ignored) + len(self._products_atoms): logger.info('ambiguous multicomponent structures. skip multistage processing') yield r continue @@ -116,7 +115,7 @@ def __call__(self, *structures: MoleculeContainer): else: seen.add(str(r)) - if depth < self.__polymerise_limit: + if depth < self._polymerise_limit: prod = r.products if len_patterns == 1: # simple case. only products or ignored can be transformed. for i in range(len(prod)): @@ -128,26 +127,26 @@ def __call__(self, *structures: MoleculeContainer): queue.append((ch, [*prod[:i], *prod[i + 1:]], depth)) yield r - def __single_stage(self, chosen, ignored) -> Iterator[List[MoleculeContainer]]: + def _single_stage(self, chosen, ignored) -> Iterator[List[MoleculeContainer]]: max_ignored_number = united_chosen = None - split = len(self.__products_atoms) > 1 - for match in lazy_product(*(x.get_mapping(y, automorphism_filter=self.__automorphism_filter) for x, y in - zip(self.patterns, chosen))): + split = len(self._products_atoms) > 1 + for match in lazy_product(*(x.get_mapping(y, automorphism_filter=self._automorphism_filter) for x, y in + zip(self._patterns, chosen))): mapping = match[0].copy() for m in match[1:]: mapping.update(m) if united_chosen is None: united_chosen = reduce(or_, chosen) max_ignored_number = max(ignored, default=0) - for new in self._patcher(united_chosen, mapping): - collision = set(new).intersection(ignored) - if collision: - new.remap(dict(zip(collision, count(max(max_ignored_number, max(new)) + 1)))) + new = self._patcher(united_chosen, mapping) + collision = set(new).intersection(ignored) + if collision: + new.remap(dict(zip(collision, count(max(max_ignored_number, max(new)) + 1)))) - if split: - yield new.split() - else: - yield [new] + if split: + yield new.split() + else: + yield [new] def fix_mapping_overlap(structures) -> List[MoleculeContainer]: @@ -159,7 +158,8 @@ def fix_mapping_overlap(structures) -> List[MoleculeContainer]: intersection = set(structure).intersection(checked_atoms) if intersection: mapping = dict(zip(intersection, count(max(max(checked_atoms), max(structure)) + 1))) - structure = structure.remap(mapping, copy=True) + structure = structure.copy() + structure.remap(mapping) logger.info('some atoms in input structures had the same numbers.\n' f'atoms {list(mapping)} were remapped to {list(mapping.values())}') checked_atoms.update(structure) diff --git a/chython/reactor/test/__init__.py b/chython/reactor/test/__init__.py new file mode 100644 index 00000000..c8a5a613 --- /dev/null +++ b/chython/reactor/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2022 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/tests/reactor/test_deprotection.py b/chython/reactor/test/test_deprotection.py similarity index 100% rename from tests/reactor/test_deprotection.py rename to chython/reactor/test/test_deprotection.py diff --git a/chython/reactor/test/test_reactor.py b/chython/reactor/test/test_reactor.py new file mode 100644 index 00000000..37b4268e --- /dev/null +++ b/chython/reactor/test/test_reactor.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles, smarts, Reactor +from pytest import mark + + +data = [ + (('[B;D3;x2;z1:4]([O:5])([O:6])-[C;@@;h1:3]1([O;M][C;M]1)', '[Cl,Br,I;D1:1]-[C;a:2]'), ('[A;@:3]-[A:2]',), + ('CC1O[C@@H]1B(O)O', 'Brc1ccccc1'), ('CC1O[C@H]1c1ccccc1',)), # inverse stereo check + (('[B;D3;x2;z1:4]([O:5])([O:6])-[C;@@;h1:3]1([O;M][C;M]1)', '[Cl,Br,I;D1:1]-[C;a:2]'), ('[A;@@:3]-[A:2]',), + ('CC1O[C@@H]1B(O)O', 'Brc1ccccc1'), ('CC1O[C@@H]1c1ccccc1',)), # keep stereo on RC + (('[B;D3;x2;z1:4]([O:5])([O:6])-[C;@@;h1:3]1([O;M][C;M]1)', '[Cl,Br,I;D1:1]-[C;a:2]'), ('[A:3]-[A:2]',), + ('CC1O[C@@H]1B(O)O', 'Brc1ccccc1'), ('CC1OC1c1ccccc1',)), # drop stereo on RC +] + + +@mark.parametrize('patterns, products, source, result', data) +def test_transformer(patterns, products, source, result): + for q, m in zip(patterns, source): + assert smarts(q) <= smiles(m) + + reactor = Reactor([smarts(x) for x in patterns], [smarts(x) for x in products]) + out = {format(smiles(x), 'h') for x in result} + assert {format(x, 'h') for x in next(reactor(*(smiles(x) for x in source))).products} == out diff --git a/tests/reactor/test_scaffold.py b/chython/reactor/test/test_scaffold.py similarity index 100% rename from tests/reactor/test_scaffold.py rename to chython/reactor/test/test_scaffold.py diff --git a/chython/reactor/test/test_transformer.py b/chython/reactor/test/test_transformer.py new file mode 100644 index 00000000..c6b57b9c --- /dev/null +++ b/chython/reactor/test/test_transformer.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles, smarts, Transformer +from pytest import mark + + +data = [ + ('[C:1]Br', '[A:1][O;M]', 'C[C@H](OC)CBr', 'C[C@H](OC)CO'), # keep stereo out of match + ('[C:2][C:1]Br', '[A:2][A:1][O;M]', 'C[C@H](OC)CBr', 'C[C@H](OC)CO'), # keep stereo inside match + ('[C;M][C;@;h1:1]([O;M])[N;M]', '[A;@@:1]', 'CC[C@H](O)N', 'CC[C@@H](O)N'), # inversion of stereo + ('[C:1]Br', '[A:1][O;M]', 'C/C=C/CBr', 'C/C=C/CO'), # keep stereo out of match + ('[C:1]Br', '[A:1][O;M]', 'CC=[C@]=CCBr', 'CC=[C@]=CCO'), # keep + ('[C:1]Br', '[A:1][O;M]', 'CC=[C@]=CBr', 'CC=C=CO'), # drop stereo on RC + ('[C:1]Br', '[A:1][O;M]', 'C/C=C/Br', 'CC=CO'), # drop stereo on RC +] + + +@mark.parametrize('pattern, replacement, source, result', data) +def test_transformer(pattern, replacement, source, result): + transformer = Transformer(smarts(pattern), smarts(replacement)) + + mol = smiles(source) + out = {format(smiles(x), 'h') for x in ([result] if isinstance(result, str) else result)} + assert {format(x, 'h') for x in transformer(mol)} == out diff --git a/chython/reactor/transformer.py b/chython/reactor/transformer.py index d2be81e7..1ca11099 100644 --- a/chython/reactor/transformer.py +++ b/chython/reactor/transformer.py @@ -43,22 +43,20 @@ def __init__(self, pattern: QueryContainer, replacement: Union[MoleculeContainer if not isinstance(pattern, QueryContainer) or not isinstance(replacement, (MoleculeContainer, QueryContainer)): raise TypeError('invalid params') - self.pattern = pattern - self.replacement = replacement - self.__automorphism_filter = automorphism_filter - self.__copy_metadata = copy_metadata - super().__init__({n for n, h in pattern._masked.items() if not h}, replacement, delete_atoms, - fix_aromatic_rings, fix_tautomers) + self._pattern = pattern + self._automorphism_filter = automorphism_filter + self._copy_metadata = copy_metadata + super().__init__(pattern, replacement, delete_atoms, fix_aromatic_rings, fix_tautomers) def __call__(self, structure: MoleculeContainer): if not isinstance(structure, MoleculeContainer): raise TypeError('only Molecules possible') - for mapping in self.pattern.get_mapping(structure, automorphism_filter=self.__automorphism_filter): - for transformed in self._patcher(structure, mapping): - if self.__copy_metadata: - transformed.meta.update(structure.meta) - yield transformed + for mapping in self._pattern.get_mapping(structure, automorphism_filter=self._automorphism_filter): + transformed = self._patcher(structure, mapping) + if self._copy_metadata: + transformed.meta.update(structure.meta) + yield transformed __all__ = ['Transformer'] diff --git a/chython/utils/__init__.py b/chython/utils/__init__.py index ff7f58a9..301256ae 100644 --- a/chython/utils/__init__.py +++ b/chython/utils/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2022 Ramil Nugmanov +# Copyright 2019-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,7 +18,6 @@ # from importlib.util import find_spec from .free_wilson import * -from .functional_groups import * from .grid import * from .retro import * from .svg import * @@ -49,7 +48,7 @@ def w(obj): printing.is_sequence = w -__all__ = ['functional_groups', 'fw_prepare_groups', 'fw_decomposition_tree', +__all__ = ['fw_prepare_groups', 'fw_decomposition_tree', 'grid_depict', 'GridDepict', 'retro_depict', 'RetroDepict', 'svg2png', 'patch_pandas'] diff --git a/chython/utils/free_wilson.py b/chython/utils/free_wilson.py index e836aa6d..9ee415c3 100644 --- a/chython/utils/free_wilson.py +++ b/chython/utils/free_wilson.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2022 Ramil Nugmanov +# Copyright 2022-2024 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -42,31 +42,31 @@ def fw_prepare_groups(core: Union[MoleculeContainer, QueryContainer], molecule: cs = set(core_map.values()) groups = molecule.substructure(molecule._atoms.keys() - cs, recalculate_hydrogens=False) gs = set(groups) - hs = molecule._hydrogens - hgs = groups._hydrogens - plane = molecule._plane cf = molecule.substructure(cs, recalculate_hydrogens=False) - chs = cf._hydrogens for n, m, b in molecule.bonds(): if n in cs: if m in gs: - h = H() - h._Core__isotope = reverse[n] # mark mapping to isotope - groups.add_bond(groups.add_atom(h, xy=plane[n]), m, b.copy()) - hgs[m] = hs[m] # restore H count - - cf.add_bond(cf.add_atom(h.copy(), xy=plane[m]), n, b.copy()) - chs[n] = hs[n] + a = molecule.atom(n) + h = H(x=a.x, y=a.y) + h._isotope = reverse[n] # mark mapping to isotope + groups.add_bond(groups.add_atom(h, _skip_calculation=True), m, b.copy(), _skip_calculation=True) + + a = molecule.atom(m) + h = H(x=a.x, y=a.y) + h._isotope = reverse[n] # mark mapping to isotope + cf.add_bond(cf.add_atom(h, _skip_calculation=True), n, b.copy(), _skip_calculation=True) elif m in cs and n in gs: - h = H() - h._Core__isotope = reverse[m] - groups.add_bond(groups.add_atom(h, xy=plane[m]), n, b.copy()) - hgs[n] = hs[n] - - cf.add_bond(cf.add_atom(h.copy(), xy=plane[n]), m, b.copy()) - chs[m] = hs[m] + a = molecule.atom(m) + h = H(x=a.x, y=a.y) + h._isotope = reverse[m] + groups.add_bond(groups.add_atom(h, _skip_calculation=True), n, b.copy(), _skip_calculation=True) + + a = molecule.atom(n) + h = H(x=a.x, y=a.y) + h._isotope = reverse[m] # mark mapping to isotope + cf.add_bond(cf.add_atom(h.copy(), _skip_calculation=True), n, b.copy(), _skip_calculation=True) groups = groups.split() groups.insert(0, cf) return groups diff --git a/chython/utils/functional_groups.py b/chython/utils/functional_groups.py deleted file mode 100644 index e1b7105f..00000000 --- a/chython/utils/functional_groups.py +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020, 2021 Ramil Nugmanov -# Copyright 2020 Dinar Batyrshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# - - -def functional_groups(molecule, limit): - """ - Generate all connected atom groups up to limit atoms. - - :param molecule: MoleculeContainer - :param limit: chain length - :return: list of molecule functional groups - """ - bonds = molecule._bonds - - if limit < 1: - raise ValueError('limit should be >= 1') - - response = [] - groups = set() - stack = [([a], list(n)) for a, n in bonds.items()] - while stack: - aug, nei = stack.pop(0) - for x in nei: - augx = (*aug, x) - if augx not in groups: - groups.add(augx) - response.append(molecule.substructure(augx, as_query=True)) - nt = nei.copy() - nt.remove(x) - nt.extend(list(bonds[x])) - if len(augx) < limit: - stack.append((augx, nt)) - return response - - -__all__ = ['functional_groups'] diff --git a/chython/utils/grid.py b/chython/utils/grid.py index cc15d718..01b5dd90 100644 --- a/chython/utils/grid.py +++ b/chython/utils/grid.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # Copyright 2024 Philippe Gantzer # This file is part of chython. # @@ -52,11 +52,10 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] if clean2d: for m in molecules: if len(m) > 1: - values = m._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() @@ -65,12 +64,12 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] for m in ms: if m is None: break - min_y = min(y for x, y in m._plane.values()) - max_y = max(y for x, y in m._plane.values()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) h = max_y - min_y if row_height < h: # get height of row row_height = h - planes.append(m._plane.copy()) + planes.append([a.xy for _, a in m.atoms()]) max_x = 0. for m in ms: @@ -88,8 +87,9 @@ def grid_depict(molecules: List[MoleculeContainer], labels: Optional[List[str]] shift_y -= row_height + 4. * font_size # restore planes - for p, m in zip(planes, molecules): - m._plane = p + for m, p in zip(molecules, planes): + for (_, a), xy in zip(m.atoms(), p): + a.xy = xy _width = shift_x - 1.5 * font_size _height = -shift_y - 1.5 * font_size diff --git a/chython/utils/rdkit.py b/chython/utils/rdkit.py index 826387f6..aa8a17a9 100644 --- a/chython/utils/rdkit.py +++ b/chython/utils/rdkit.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2019-2022 Ramil Nugmanov +# Copyright 2019-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -18,7 +18,6 @@ # from rdkit.Chem import AssignStereochemistry, Atom, BondStereo, BondType, ChiralType, Conformer, RWMol, SanitizeMol from ..containers import MoleculeContainer -from ..exceptions import IsChiral, NotChiral, ValenceError from ..periodictable import Element @@ -27,89 +26,55 @@ def from_rdkit_molecule(data): RDKit molecule object to MoleculeContainer converter """ mol = MoleculeContainer() - parsed_mapping = mol._parsed_mapping - mol_conformers = mol._conformers - bonds = mol._bonds - atoms, mapping = [], [] + mapping = {} tetrahedron_stereo = [] - for a in data.GetAtoms(): - e = Element.from_symbol(a.GetSymbol()) - isotope = a.GetIsotope() - if isotope: - e = e(isotope) - else: - e = e() - atom = {'atom': e, 'charge': a.GetFormalCharge()} - - radical = a.GetNumRadicalElectrons() - if radical: - atom['is_radical'] = True - - atoms.append(atom) - mapping.append(a.GetAtomMapNum()) - tetrahedron_stereo.append(a.GetChiralTag()) - - conformers = [] - c = data.GetConformers() - if c: - for atom, (x, y, _) in zip(atoms, c[0].GetPositions()): - atom['xy'] = (x, y) - for c in c: - if c.Is3D(): - conformers.append(c.GetPositions()) - - new_map = [] - for a, n in zip(atoms, mapping): - a = mol.add_atom(**a) - new_map.append(a) - parsed_mapping[a] = n - - stereo = [] + for ra in data.GetAtoms(): + e = Element.from_symbol(ra.GetSymbol()) + a = e(ra.GetIsotope() or None, charge=ra.GetFormalCharge(), is_radical=bool(ra.GetNumRadicalElectrons()), + parsed_mapping=ra.GetAtomMapNum(), implicit_hydrogens=ra.GetNumExplicitHs() + ra.GetNumImplicitHs()) + mapping[ra.GetIdx()] = mol.add_atom(a, _skip_calculation=True) + s = ra.GetChiralTag() + if s in (_chiral_cw, _chiral_ccw): + tetrahedron_stereo.append((ra.GetIdx(), [x.GetIdx() for x in ra.GetNeighbors()], s == _chiral_ccw)) + + cis_trans_stereo = [] for b in data.GetBonds(): - n, m = new_map[b.GetBeginAtomIdx()], new_map[b.GetEndAtomIdx()] - mol.add_bond(n, m, _rdkit_bond_map[b.GetBondType()]) + n, m = mapping[b.GetBeginAtomIdx()], mapping[b.GetEndAtomIdx()] + mol.add_bond(n, m, _rdkit_bond_map[b.GetBondType()], _skip_calculation=True) s = b.GetStereo() - if s == _cis: + if s in (_cis, _trans): nn, nm = b.GetStereoAtoms() - stereo.append((mol.add_cis_trans_stereo, n, m, new_map[nn], new_map[nm], True)) - elif s == _trans: - nn, nm = b.GetStereoAtoms() - stereo.append((mol.add_cis_trans_stereo, n, m, new_map[nn], new_map[nm], False)) - - for n, s in zip(new_map, tetrahedron_stereo): - if s == _chiral_cw: - env = bonds[n] - env = [x for x in new_map if x in env] - stereo.append((mol.add_atom_stereo, n, env, False)) - elif s == _chiral_ccw: - env = bonds[n] - env = [x for x in new_map if x in env] - stereo.append((mol.add_atom_stereo, n, env, True)) - - while stereo: - fail_stereo = [] - old_stereo = len(stereo) - for f, *args in stereo: - try: - f(*args, clean_cache=False) - except NotChiral: - fail_stereo.append((f, *args)) - except IsChiral: - pass - except ValenceError: - mol.flush_cache() - break - else: - stereo = fail_stereo - if len(stereo) == old_stereo: - break - mol.flush_stereo_cache() - continue - break + cis_trans_stereo.append((n, m, mapping[nn], mapping[nm], s == _cis)) - for c in conformers: - mol_conformers.append({k: tuple(v) for k, v in zip(new_map, c)}) + if cs := data.GetConformers(): + # set coordinates from the first rdkit conformer. usually it's 2d layout + for (_, atom), (x, y, _) in zip(mol.atoms(), cs[0].GetPositions()): + atom.xy = (x, y) + + conformers = [] + for c in cs: + if c.Is3D(): + conformers.append({n: tuple(v) for n, v in enumerate(c.GetPositions(), 1)}) + if conformers: + mol._conformers = conformers + + # move stereo labels as is + for n, env, s in tetrahedron_stereo: + n = mapping[n] + try: + mol.atom(n)._stereo = mol._translate_tetrahedron_sign(n, [mapping[x] for x in env], s) + except KeyError: + pass + for n, m, nn, nm, s in cis_trans_stereo: + try: + mol.bond(n, m)._stereo = mol._translate_cis_trans_sign(n, m, nn, nm, s) + except KeyError: + pass + + mol.fix_structure(recalculate_hydrogens=False) + if tetrahedron_stereo or cis_trans_stereo: + mol.fix_stereo() return mol @@ -123,11 +88,10 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): """ mol = RWMol() mapping = {} - atoms = data._atoms - bonds = data._bonds for n, a in data.atoms(): ra = Atom(a.atomic_number) + ra.SetNumExplicitHs(a.implicit_hydrogens) if keep_mapping: ra.SetAtomMapNum(n) if a.charge: @@ -138,24 +102,35 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): ra.SetNumRadicalElectrons(1) mapping[n] = mol.AddAtom(ra) + inverted = {v: k for k, v in mapping.items()} + for n, m, b in data.bonds(): - if atoms[n].atomic_symbol not in _inorganic: + if data.atom(n).atomic_symbol not in _inorganic: n, m = m, n # fix direction of dative bond mol.AddBond(mapping[n], mapping[m], _bond_map[b.order]) - for n in data._atoms_stereo: + for n, a in data.atoms(): + if a.stereo is None: + continue + if n not in data.stereogenic_tetrahedrons: + continue # allenes are not supported ra = mol.GetAtomWithIdx(mapping[n]) - env = bonds[n] - s = data._translate_tetrahedron_sign(n, [x for x in mapping if x in env]) + env = [inverted[x.GetIdx()] for x in ra.GetNeighbors()] + s = data._translate_tetrahedron_sign(n, env) ra.SetChiralTag(_chiral_ccw if s else _chiral_cw) - for nm, s in data._cis_trans_stereo.items(): - n, m = nm - if m in bonds[n]: # cumulenes unsupported - nn, nm, *_ = data._stereo_cis_trans[nm] - b = mol.GetBondBetweenAtoms(mapping[n], mapping[m]) - b.SetStereoAtoms(mapping[nn], mapping[nm]) - b.SetStereo(_cis if s else _trans) + for n, m, b in data.bonds(): + if b.stereo is None: + continue + # check for simple cis-trans + nm = data._stereo_cis_trans_centers.get(n) + if nm is None or n not in nm or m not in nm: + continue + + n1, m1, *_ = data.stereogenic_cis_trans[nm] + rb = mol.GetBondBetweenAtoms(mapping[n], mapping[m]) + rb.SetStereoAtoms(mapping[n1], mapping[m1]) + rb.SetStereo(_cis if b.stereo else _trans) conf = Conformer() for n, a in data.atoms(): @@ -163,11 +138,12 @@ def to_rdkit_molecule(data: MoleculeContainer, *, keep_mapping=True): conf.Set3D(False) mol.AddConformer(conf, assignId=True) - for c in data._conformers: - conf = Conformer() - for n, xyz in c.items(): - conf.SetAtomPosition(mapping[n], xyz) - mol.AddConformer(conf, assignId=True) + if hasattr(data, '_conformers'): + for c in data._conformers: + conf = Conformer() + for n, xyz in c.items(): + conf.SetAtomPosition(mapping[n], xyz) + mol.AddConformer(conf, assignId=True) SanitizeMol(mol) AssignStereochemistry(mol, flagPossibleStereoCenters=True, force=True) diff --git a/chython/utils/retro.py b/chython/utils/retro.py index d94ec666..cd84b184 100644 --- a/chython/utils/retro.py +++ b/chython/utils/retro.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2023 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # Copyright 2021 Alexander Sizov # This file is part of chython. # @@ -66,22 +66,21 @@ def retro_depict(tree: Tree, *, y_gap=3., x_gap=5., width=None, height=None, cle if clean2d: for m in column: if len(m) > 1: - values = m._plane.values() - min_x = min(x for x, _ in values) - max_x = max(x for x, _ in values) - min_y = min(y for _, y in values) - max_y = max(y for _, y in values) + min_x = min(a.x for _, a in m.atoms()) + max_x = max(a.x for _, a in m.atoms()) + min_y = min(a.y for _, a in m.atoms()) + max_y = max(a.y for _, a in m.atoms()) if max_y - min_y < .01 and max_x - min_x < 0.01: m.clean2d() - heights = [max(y for _, y in m._plane.values()) - min(y for _, y in m._plane.values()) for m in column] + heights = [max(a.y for _, a in m.atoms()) - min(a.y for _, a in m.atoms()) for m in column] y_shift = sum(heights) + y_gap * (len(heights) - 1) # column height with gaps if y_shift > c_max_y: c_max_y = y_shift y_shift /= 2. # center align for m, h in zip(column, heights): - plane = m._plane.copy() # backup + plane = [a.xy for _, a in m.atoms()] # backup mx = m._fix_plane_min(x_shift, -y_shift) if mx > c_max_x: c_max_x = mx @@ -92,7 +91,8 @@ def retro_depict(tree: Tree, *, y_gap=3., x_gap=5., width=None, height=None, cle y_shift -= h + y_gap render.append(m.depict(_embedding=True)[:5]) - m._plane = plane # restore + for (_, a), xy in zip(m.atoms(), plane): # restore + a.xy = xy x_shift = c_max_x + x_gap # between columns gap last_layer = current_layer diff --git a/chython/utils/test/__init__.py b/chython/utils/test/__init__.py new file mode 100644 index 00000000..031c963a --- /dev/null +++ b/chython/utils/test/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# diff --git a/chython/utils/test/test_rdkit.py b/chython/utils/test/test_rdkit.py new file mode 100644 index 00000000..5af40af0 --- /dev/null +++ b/chython/utils/test/test_rdkit.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from chython import smiles +from chython.utils.rdkit import * +from pytest import mark +from rdkit import Chem +from rdkit.Chem import AllChem + + +data = [ + 'CCO', + 'C/C=C/C', + 'C[C@H](O)CC', + 'C\C=C/O[C@@H]1OC[C@@H](Oc2ccccc2)[C@@H](O)[C@H]1O\C=C\C', + '[nH]1cccc1', + 'C\C=C\C=C', + 'C[C@@H](O)[C@H](O)[C@H](C)O' +] + +@mark.parametrize('source', data) +def test_to_rdkit(source): + mol = smiles(source) + rd_mol = to_rdkit_molecule(mol, keep_mapping=False) + rd_mol_mapping = to_rdkit_molecule(mol, keep_mapping=True) + + assert format(smiles(Chem.MolToSmiles(rd_mol)), 'h') == format(mol, 'h') + assert format(smiles(Chem.MolToSmiles(rd_mol_mapping)), 'm') == format(mol, 'm') + + +@mark.parametrize('source', data) +def test_from_rdkit(source): + assert format(from_rdkit_molecule(Chem.MolFromSmiles(source)), 'h') == format(smiles(source), 'h') + + +def test_coordinates(): + rd_mol = to_rdkit_molecule(smiles('CCO'), keep_mapping=False) + + AllChem.Compute2DCoords(rd_mol) + mol = from_rdkit_molecule(rd_mol) + assert any(a.x for _, a in mol.atoms()) + + rd_mol_h = Chem.AddHs(rd_mol) + AllChem.EmbedMolecule(rd_mol_h) + rd_mol_nh = Chem.RemoveHs(rd_mol_h) + + mol = from_rdkit_molecule(rd_mol_nh) + assert hasattr(mol, '_conformers') + assert isinstance(mol._conformers, list) + assert len(mol._conformers) == 1 + assert isinstance(mol._conformers[0], dict) + assert len(mol._conformers[0]) == 3 + assert all(tuple(x) for x in mol._conformers[0].values()) + assert all(len(x) == 3 for x in mol._conformers[0].values()) + assert all(isinstance(x, float) for x in mol._conformers[0].values() for x in x) + assert any(x for x in mol._conformers[0].values() for x in x) diff --git a/pyproject.toml b/pyproject.toml index 155f88ec..825bd447 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = 'chython' -version = '1.81' +version = '2.0' description = 'Library for processing molecules and reactions in python way' authors = ['Ramil Nugmanov '] license = 'LGPLv3' diff --git a/tests/data/MR.rdf b/test/MR.rdf similarity index 100% rename from tests/data/MR.rdf rename to test/MR.rdf diff --git a/tests/data/arenes.sdf b/test/arenes.sdf similarity index 100% rename from tests/data/arenes.sdf rename to test/arenes.sdf diff --git a/tests/data/ch.xyz b/test/ch.xyz similarity index 100% rename from tests/data/ch.xyz rename to test/ch.xyz diff --git a/tests/data/cycle.sdf b/test/cycle.sdf similarity index 100% rename from tests/data/cycle.sdf rename to test/cycle.sdf diff --git a/tests/data/depict.sdf b/test/depict.sdf similarity index 100% rename from tests/data/depict.sdf rename to test/depict.sdf diff --git a/tests/data/hbonds.sdf b/test/hbonds.sdf similarity index 100% rename from tests/data/hbonds.sdf rename to test/hbonds.sdf diff --git a/tests/data/heterocycles_charges.smi b/test/heterocycles_charges.smi similarity index 100% rename from tests/data/heterocycles_charges.smi rename to test/heterocycles_charges.smi diff --git a/tests/data/implicit.mrv b/test/implicit.mrv similarity index 100% rename from tests/data/implicit.mrv rename to test/implicit.mrv diff --git a/tests/data/implicit.sdf b/test/implicit.sdf similarity index 100% rename from tests/data/implicit.sdf rename to test/implicit.sdf diff --git a/tests/data/ions.rdf b/test/ions.rdf similarity index 100% rename from tests/data/ions.rdf rename to test/ions.rdf diff --git a/tests/data/isomorphism.sdf b/test/isomorphism.sdf similarity index 100% rename from tests/data/isomorphism.sdf rename to test/isomorphism.sdf diff --git a/tests/data/mcs.sdf b/test/mcs.sdf similarity index 100% rename from tests/data/mcs.sdf rename to test/mcs.sdf diff --git a/tests/data/morgan_ruiner.sdf b/test/morgan_ruiner.sdf similarity index 100% rename from tests/data/morgan_ruiner.sdf rename to test/morgan_ruiner.sdf diff --git a/tests/data/peptide.sdf b/test/peptide.sdf similarity index 100% rename from tests/data/peptide.sdf rename to test/peptide.sdf diff --git a/tests/data/reaction_centerslist.rdf b/test/reaction_centerslist.rdf similarity index 100% rename from tests/data/reaction_centerslist.rdf rename to test/reaction_centerslist.rdf diff --git a/tests/data/standardize.rdf b/test/standardize.rdf similarity index 100% rename from tests/data/standardize.rdf rename to test/standardize.rdf diff --git a/tests/data/standardize.sdf b/test/standardize.sdf similarity index 100% rename from tests/data/standardize.sdf rename to test/standardize.sdf diff --git a/tests/data/stereo.sdf b/test/stereo.sdf similarity index 100% rename from tests/data/stereo.sdf rename to test/stereo.sdf diff --git a/tests/algorithms/fingerprints/test_linear.py b/tests/algorithms/fingerprints/test_linear.py deleted file mode 100644 index fa39950e..00000000 --- a/tests/algorithms/fingerprints/test_linear.py +++ /dev/null @@ -1,156 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from chython import smiles -import numpy as np -import pytest - - -def test_linear_fingerprint_basic(): - # Test basic fingerprint generation - mol = smiles('CCO') - fp = mol.linear_fingerprint(min_radius=1, max_radius=2, length=1024) - - # Test array properties - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - assert fp.shape == (1024,) - - # Test binary nature - assert set(np.unique(fp)).issubset({0, 1}) - - -def test_linear_fingerprint_consistency(): - # Test that fingerprints are consistent for the same molecule - mol = smiles('CCO') - fp1 = mol.linear_fingerprint() - fp2 = mol.linear_fingerprint() - - # Test exact equality of arrays - np.testing.assert_array_equal(fp1, fp2) - - # Test different molecules give different fingerprints - mol2 = smiles('CCC') - fp3 = mol2.linear_fingerprint() - assert not np.array_equal(fp1, fp3) - - -def test_linear_fingerprint_parameters(): - mol = smiles('CCO') - - # Test different radius parameters - fp1 = mol.linear_fingerprint(min_radius=1, max_radius=2) - fp2 = mol.linear_fingerprint(min_radius=1, max_radius=3) - assert fp2.sum() >= fp1.sum() # More radius should capture more features - - # Test different lengths - fp3 = mol.linear_fingerprint(length=2048) - assert fp3.shape == (2048,) - assert isinstance(fp3, np.ndarray) - assert fp3.dtype == np.uint8 - - # Test number of active bits - fp4 = mol.linear_fingerprint(number_active_bits=3) - assert fp4.sum() >= fp1.sum() # More active bits should set more bits - - -def test_linear_fingerprint_bit_pairs(): - # Test the number_bit_pairs parameter - mol = smiles('CCCC') # molecule with multiple similar fragments - - # Compare different number_bit_pairs settings - fp1 = mol.linear_fingerprint(number_bit_pairs=1) - fp2 = mol.linear_fingerprint(number_bit_pairs=2) - fp3 = mol.linear_fingerprint(number_bit_pairs=4) - - # More bit pairs should potentially activate more bits - assert fp1.sum() <= fp2.sum() <= fp3.sum() - - -def test_linear_fingerprint_complex_molecule(): - # Test with a more complex molecule - mol = smiles('c1ccccc1CC(=O)O') - fp = mol.linear_fingerprint() - - # Basic checks - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - - # Should have reasonable number of bits set - assert 0 < fp.sum() < len(fp) # some bits should be set, but not all - - # Test with different parameters - fp_large = mol.linear_fingerprint(max_radius=6, length=2048) - assert fp_large.shape == (2048,) - assert fp_large.sum() > 0 - - -def test_linear_fingerprint_edge_cases(): - # Test single atom - mol_single = smiles('C') - fp_single = mol_single.linear_fingerprint() - assert isinstance(fp_single, np.ndarray) - assert fp_single.dtype == np.uint8 - assert fp_single.sum() > 0 # should have some bits set - - # Test disconnected components - mol_disconnected = smiles('CC.CC') - fp_disconnected = mol_disconnected.linear_fingerprint() - assert isinstance(fp_disconnected, np.ndarray) - assert fp_disconnected.dtype == np.uint8 - - -def test_linear_fingerprint_arbitrary_length(): - # Test that non-power-of-2 lengths work but might have unexpected behavior - mol = smiles('CCO') - lengths = [100, 1000, 1500, 3000] - - for length in lengths: - fp = mol.linear_fingerprint(length=length) - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - assert fp.shape == (length,) - # The actual bits set might be fewer than expected due to masking - assert 0 <= fp.sum() <= length - - -def test_linear_fingerprint_comparison(): - # Test fingerprint comparison between similar molecules - mol1 = smiles('CCO') - mol2 = smiles('CCC') - mol3 = smiles('CCCO') - - fp1 = mol1.linear_fingerprint() - fp2 = mol2.linear_fingerprint() - fp3 = mol3.linear_fingerprint() - - # Calculate Tanimoto similarities - def tanimoto(a, b): - intersection = np.sum(np.logical_and(a, b)) - union = np.sum(np.logical_or(a, b)) - return intersection / union if union > 0 else 0.0 - - # Similar molecules should have higher similarity - sim12 = tanimoto(fp1, fp2) - sim13 = tanimoto(fp1, fp3) - sim23 = tanimoto(fp2, fp3) - - assert 0 <= sim12 <= 1 - assert 0 <= sim13 <= 1 - assert 0 <= sim23 <= 1 \ No newline at end of file diff --git a/tests/algorithms/fingerprints/test_morgan.py b/tests/algorithms/fingerprints/test_morgan.py deleted file mode 100644 index c6407e9f..00000000 --- a/tests/algorithms/fingerprints/test_morgan.py +++ /dev/null @@ -1,308 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from chython import smiles, ReactionContainer -import numpy as np -from pytest import mark -import pytest - - -def test_morgan_fingerprint(): - # Test basic fingerprint generation - mol = smiles('CCO') - fp = mol.morgan_fingerprint(min_radius=1, max_radius=2, length=1024) - - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - assert fp.shape == (1024,) - assert fp.sum() > 0 # Should have some bits set - - # Test different lengths - fp2 = mol.morgan_fingerprint(length=2048) - assert fp2.shape == (2048,) - - # Test different number of active bits - fp3 = mol.morgan_fingerprint(number_active_bits=3) - assert fp3.sum() >= fp.sum() # Should have more or equal bits set - - -def test_morgan_bit_set(): - mol = smiles('CCO') - bits = mol.morgan_bit_set(min_radius=1, max_radius=2, length=1024) - - assert isinstance(bits, set) - assert len(bits) > 0 - assert all(isinstance(x, int) for x in bits) - assert all(0 <= x < 1024 for x in bits) - - # Test with different parameters - bits2 = mol.morgan_bit_set(length=2048, number_active_bits=3) - assert all(0 <= x < 2048 for x in bits2) - assert len(bits2) >= len(bits) # Should have more or equal bits - - -def test_morgan_hash_set(): - mol = smiles('CCO') - hashes = mol.morgan_hash_set(min_radius=1, max_radius=2) - - assert isinstance(hashes, set) - assert len(hashes) > 0 - assert all(isinstance(x, int) for x in hashes) - - -def test_morgan_hash_smiles(): - mol = smiles('CCO') - hash_smiles = mol.morgan_hash_smiles(min_radius=1, max_radius=2) - - assert isinstance(hash_smiles, dict) - assert len(hash_smiles) > 0 - assert all(isinstance(k, int) for k in hash_smiles) - assert all(isinstance(v, list) for v in hash_smiles.values()) - assert all(isinstance(s, str) for v in hash_smiles.values() for s in v) - - -def test_morgan_smiles_hash(): - mol = smiles('CCO') - smiles_hash = mol.morgan_smiles_hash(min_radius=1, max_radius=2) - - assert isinstance(smiles_hash, dict) - assert len(smiles_hash) > 0 - assert all(isinstance(k, str) for k in smiles_hash) - assert all(isinstance(v, list) for v in smiles_hash.values()) - assert all(isinstance(h, int) for v in smiles_hash.values() for h in v) - - -@mark.parametrize('radius', [(0, 1), (1, 0), (-1, 2)]) -def test_invalid_radius(radius): - mol = smiles('CCO') - min_r, max_r = radius - try: - mol.morgan_fingerprint(min_radius=min_r, max_radius=max_r) - assert False, "Should raise AssertionError" - except AssertionError: - pass - - -def test_complex_molecule(): - # Test with a more complex molecule containing rings and multiple atom types - mol = smiles('c1ccccc1CC(=O)O') - - fp1 = mol.morgan_fingerprint(min_radius=1, max_radius=3) - fp2 = mol.morgan_fingerprint(min_radius=1, max_radius=4) - - assert fp1.sum() < fp2.sum() # More radius should capture more features - - # Test hash consistency - hash_set1 = mol.morgan_hash_set(min_radius=1, max_radius=2) - hash_set2 = mol.morgan_hash_set(min_radius=1, max_radius=2) - assert hash_set1 == hash_set2 # Should be deterministic - - -def test_morgan_fingerprint_numpy(): - # Test numpy array properties of Morgan fingerprints - mol = smiles('CCO') - fp = mol.morgan_fingerprint(min_radius=1, max_radius=2, length=1024) - - # Test array type and shape - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - assert fp.shape == (1024,) - - # Test binary nature - assert set(np.unique(fp)).issubset({0, 1}) - - # Test different lengths - fp_2048 = mol.morgan_fingerprint(length=2048) - assert fp_2048.shape == (2048,) - assert fp_2048.dtype == np.uint8 - - # Test different number of active bits - fp_more_bits = mol.morgan_fingerprint(number_active_bits=4) - assert fp_more_bits.sum() >= fp.sum() - - -def test_morgan_fingerprint_consistency(): - # Test that fingerprints are consistent for the same molecule - mol = smiles('CCO') - fp1 = mol.morgan_fingerprint() - fp2 = mol.morgan_fingerprint() - - # Test exact equality of arrays - np.testing.assert_array_equal(fp1, fp2) - - # Test different molecules give different fingerprints - mol2 = smiles('CCC') - fp3 = mol2.morgan_fingerprint() - assert not np.array_equal(fp1, fp3) - - -def test_morgan_fingerprint_parameters(): - mol = smiles('CCO') - - # Test different radius parameters - fp1 = mol.morgan_fingerprint(min_radius=1, max_radius=2) - fp2 = mol.morgan_fingerprint(min_radius=1, max_radius=3) - assert fp2.sum() >= fp1.sum() # More radius should capture more features - - # Test power of 2 lengths - for length in [128, 256, 512, 1024, 2048, 4096]: - fp = mol.morgan_fingerprint(length=length) - assert fp.shape == (length,) - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - - -def test_morgan_fingerprint_arbitrary_length(): - # Test that non-power-of-2 lengths work but might have unexpected behavior - mol = smiles('CCO') - lengths = [100, 1000, 1500, 3000] - - for length in lengths: - fp = mol.morgan_fingerprint(length=length) - assert isinstance(fp, np.ndarray) - assert fp.dtype == np.uint8 - assert fp.shape == (length,) - # The actual bits set might be fewer than expected due to masking - assert 0 <= fp.sum() <= length - - -def test_complex_molecule_clean2d(): - # Test with more complex molecule - mol = smiles('c1ccccc1CC(=O)O') - mol.clean2d() - - # Check coordinates exist - assert all(n in mol._plane for n in mol) - - # Verify ring atoms are roughly coplanar - ring_atoms = [n for n in mol if len(mol._bonds[n]) == 2] - if ring_atoms: - coords = [mol._plane[n] for n in ring_atoms] - # Calculate variance in y coordinates - should be small for planar ring - y_coords = [y for _, y in coords] - y_mean = sum(y_coords) / len(y_coords) - y_variance = sum((y - y_mean) ** 2 for y in y_coords) / len(y_coords) - assert y_variance < 1.0 # reasonable threshold for planarity - - -def test_disconnected_components(): - # Test molecule with multiple disconnected components - mol = smiles('CCO.c1ccccc1') - mol.clean2d() - - # Check all atoms have coordinates - assert all(n in mol._plane for n in mol) - - # Components should be separated in space - components = list(mol.connected_components) - assert len(components) == 2 - - # Get bounding boxes for each component - def get_bounds(atoms): - xs = [mol._plane[n][0] for n in atoms] - ys = [mol._plane[n][1] for n in atoms] - return min(xs), max(xs), min(ys), max(ys) - - bounds1 = get_bounds(components[0]) - bounds2 = get_bounds(components[1]) - - # Check components don't overlap in x-direction - assert bounds1[1] < bounds2[0] or bounds2[1] < bounds1[0] - - -def test_reaction_clean2d(): - # Create a simple reaction - reactant = smiles('CCO') - product = smiles('CC=O') - reaction = ReactionContainer([reactant], [product]) - - # Clean coordinates - reaction.clean2d() - - # Check that all molecules have coordinates - for molecule in reaction.molecules(): - assert all(n in molecule._plane for n in molecule) - - # Check that reactants are positioned before products - reactant_max_x = max(x for mol in reaction.reactants - for x, _ in mol._plane.values()) - product_min_x = min(x for mol in reaction.products - for x, _ in mol._plane.values()) - assert reactant_max_x < product_min_x - - # Check arrow exists and is positioned between reactants and products - assert hasattr(reaction, '_arrow') - arrow_start, arrow_end = reaction._arrow - assert reactant_max_x < arrow_start < arrow_end < product_min_x - - -def test_reaction_with_reagents(): - # Create reaction with reagents - reactant = smiles('CCO') - reagent = smiles('Cl') - product = smiles('CCCl') - reaction = ReactionContainer([reactant], [reagent], [product]) - - reaction.clean2d() - - # Check all molecules have coordinates - for molecule in reaction.molecules(): - assert all(n in molecule._plane for n in molecule) - - # Check reagents are positioned above the arrow - reagent_coords = [(x, y) for mol in reaction.reagents - for x, y in mol._plane.values()] - assert all(y > 0 for _, y in reagent_coords) # reagents should be above - - # Verify arrow position - arrow_start, arrow_end = reaction._arrow - assert arrow_start < arrow_end - - # Check relative positioning - reactant_max_x = max(x for mol in reaction.reactants - for x, _ in mol._plane.values()) - product_min_x = min(x for mol in reaction.products - for x, _ in mol._plane.values()) - assert reactant_max_x < arrow_start < arrow_end < product_min_x - - -def test_fix_positions(): - # Test just the position fixing functionality - reaction = ReactionContainer([smiles('CCO')], [smiles('CC=O')]) - - # Clean individual molecules first - for mol in reaction.molecules(): - mol.clean2d() - - # Then fix positions - reaction.fix_positions() - - # Check arrow exists - assert hasattr(reaction, '_arrow') - - # Check molecules are properly spaced - reactant_coords = [(x, y) for mol in reaction.reactants - for x, y in mol._plane.values()] - product_coords = [(x, y) for mol in reaction.products - for x, y in mol._plane.values()] - - # Verify no overlap between reactants and products - reactant_max_x = max(x for x, _ in reactant_coords) - product_min_x = min(x for x, _ in product_coords) - assert reactant_max_x < product_min_x \ No newline at end of file diff --git a/tests/algorithms/mapping/test_mapping.py b/tests/algorithms/mapping/test_mapping.py deleted file mode 100644 index b77c823e..00000000 --- a/tests/algorithms/mapping/test_mapping.py +++ /dev/null @@ -1,128 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from chython import smiles - - -def test_basic_mapping(): - # Test basic atom mapping in simple molecules - mol1 = smiles('CC(=O)O') # acetic acid - mol2 = smiles('CC(=O)O') # acetic acid - - mappings = list(mol1.get_mapping(mol2)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(mol1) # all atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_substructure_mapping(): - # Test mapping of a substructure - mol = smiles('CC(=O)OC') # methyl acetate - substructure = smiles('CC(=O)O') # acetic acid pattern - - mappings = list(substructure.get_mapping(mol)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(substructure) # all substructure atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_multiple_mappings(): - # Test cases where multiple valid mappings exist - mol = smiles('CC(=O)CC(=O)C') # 2,4-pentanedione - pattern = smiles('CC(=O)C') # acetone pattern - - mappings = list(pattern.get_mapping(mol)) - assert len(mappings) > 1 # should find multiple matches - assert all(len(m) == len(pattern) for m in mappings) # each mapping should cover all pattern atoms - - -def test_aromatic_mapping(): - # Test mapping with aromatic systems - benzene = smiles('c1ccccc1') - toluene = smiles('Cc1ccccc1') - - mappings = list(benzene.get_mapping(toluene)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(benzene) # all benzene atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_reaction_mapping(): - # Test mapping in reaction context - reactant = smiles('CC(=O)O') # acetic acid - product = smiles('CC(=O)OC') # methyl acetate - - mappings = list(reactant.get_mapping(product)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(reactant) # all reactant atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_complex_mapping(): - # Test mapping with complex molecules - mol1 = smiles('CC1=C(C(=O)C2=C(C1=O)N3CC4=C(C3(CC2)C)NC5=CC=CC=C54)C') # complex structure - mol2 = smiles('CC1=C(C(=O)C2=C(C1=O)N3CC4=C(C3(CC2)C)NC5=CC=CC=C54)C') # same structure - - mappings = list(mol1.get_mapping(mol2)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(mol1) # all atoms should be mapped - assert all(isinstance(k, int) and isinstance(v, int) for k, v in mapping.items()) - - -def test_mapping_with_different_bonds(): - # Test mapping when bond orders differ - mol1 = smiles('C=CC=C') # 1,3-butadiene - mol2 = smiles('C=CC=C') # 1,3-butadiene - - # Should find mapping for identical molecules - mappings = list(mol1.get_mapping(mol2)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(mol1) - - # Verify that the mapping preserves atom connectivity and bond orders - for atom1, atom2 in mapping.items(): - # Check that the number of neighbors is the same - assert len(mol1._bonds[atom1]) == len(mol2._bonds[atom2]) - # Check that bond orders are preserved - mol1_orders = {mol1._bonds[atom1][x].order for x in mol1._bonds[atom1]} - mol2_orders = {mol2._bonds[atom2][x].order for x in mol2._bonds[atom2]} - assert mol1_orders == mol2_orders - - -def test_mapping_with_charges(): - # Test mapping with charged atoms - mol1 = smiles('C[NH3+]') # methylammonium - mol2 = smiles('C[NH3+]') # methylammonium - - # Should find mapping for identical molecules - mappings = list(mol1.get_mapping(mol2)) - assert len(mappings) > 0 # at least one mapping should exist - mapping = mappings[0] # take first mapping - assert len(mapping) == len(mol1) - - # Verify that the mapping preserves atom connectivity and charges - for atom1, atom2 in mapping.items(): - assert mol1._bonds[atom1].keys() == mol2._bonds[atom2].keys() - assert mol1._charges[atom1] == mol2._charges[atom2] \ No newline at end of file diff --git a/tests/algorithms/test_smiles.py b/tests/algorithms/test_smiles.py deleted file mode 100644 index 6e54a022..00000000 --- a/tests/algorithms/test_smiles.py +++ /dev/null @@ -1,160 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -from chython import smiles - - -def test_basic_smiles(): - # Test basic SMILES generation - mol = smiles('CCO') # ethanol - assert 'C' in str(mol) and 'O' in str(mol) # check presence of atoms - - mol = smiles('c1ccccc1') # benzene - assert 'c1ccccc1' in str(mol) # aromatic representation - - -def test_format_options(): - # Test different format options - mol = smiles('c1ccccc1') - - # Test asymmetric closures - assert mol.__format__('a').startswith('c') - - # Test disable stereo - chiral_mol = smiles('C[C@H](O)CC') - assert '@' not in chiral_mol.__format__('!s') - - # Test aromatic bonds - kekulized = mol.__format__('A') - assert 'c' not in kekulized # should not contain aromatic atoms - - # Test atom mapping - assert ':' in mol.__format__('m') # atom mapping numbers present - - # Test random ordering - mol_str = str(mol) - random_smiles = mol.__format__('r') - assert len(random_smiles) > 0 # valid SMILES generated - - -def test_smiles_atoms_order(): - # Test atoms order property - mol = smiles('CCO') - order = mol.smiles_atoms_order - assert isinstance(order, tuple) - assert len(order) == 3 # number of atoms - assert all(isinstance(x, int) for x in order) - - -def test_molecule_smiles(): - # Test MoleculeSmiles specific functionality - mol = smiles('CCO') - atoms = list(mol._atoms.keys()) # get actual atom indices - - # Test sticky smiles generation - sticky = mol.sticky_smiles(atoms[0]) # fix first atom - assert sticky and isinstance(sticky, str) - - # Test sticky smiles with both ends - sticky_both = mol.sticky_smiles(atoms[0], atoms[-1]) # fix first and last atoms - assert sticky_both and isinstance(sticky_both, str) - - -def test_complex_structures(): - # Test complex molecular structures - mol = smiles('C1CC(=O)NC(=O)C1') # cyclic peptide - assert all(x in str(mol) for x in ('C', 'N', '=O')) # check for expected fragments - - mol = smiles('C[C@H](N)C(=O)O') # amino acid - assert '@' in str(mol) # stereo information preserved - - -def test_charged_species(): - # Test charged molecules - mol = smiles('[NH4+]') # ammonium - assert '+' in str(mol) - - mol = smiles('[OH-]') # hydroxide - assert '-' in str(mol) - - -def test_radical_species(): - # Test radical species - mol = smiles('[CH3]') - assert '[' in str(mol) and ']' in str(mol) # bracketed form - - # Test with format options - assert '[' in mol.__format__('h') # show hydrogens - - -def test_cgr_smiles(): - # Test CGR SMILES functionality - mol = smiles('CC>>CCC') # dynamic transformation - assert '>' in str(mol) - - # Test dynamic bonds - mol = smiles('C=C>>CC') - assert '=' in str(mol) - - -def test_query_smiles(): - # Test basic query atoms - mol = smiles('[C]') # carbon atom - assert len(mol) == 1 - - mol = smiles('[N]') # nitrogen atom - assert len(mol) == 1 - - mol = smiles('[O]') # oxygen atom - assert len(mol) == 1 - - mol = smiles('[H]') # hydrogen atom - assert len(mol) == 1 - - -def test_smiles_comparison(): - # Test SMILES comparison functionality - mol1 = smiles('CCO') - mol2 = smiles('CCO') - mol3 = smiles('CCC') - - assert mol1 == mol2 # same molecules - assert mol1 != mol3 # different molecules - assert hash(mol1) == hash(mol2) # same hash for same molecules - - -def test_cxsmiles_extensions(): - # Test CXSMILES extensions - mol = smiles('[CH3]') # radical - assert mol.smiles # valid SMILES generated - - # Test without CXSMILES - assert mol.__format__('!x') # valid SMILES without extensions - - -def test_special_cases(): - # Test special cases and edge cases - mol = smiles('[H][H]') # hydrogen molecule - assert '[H]' in str(mol) - - mol = smiles('C#N') # triple bond - assert '#' in str(mol) - - mol = smiles('C~C') # any bond - assert '~' in str(mol) \ No newline at end of file diff --git a/tests/containers/test_query.py b/tests/containers/test_query.py deleted file mode 100644 index 78d8e3f7..00000000 --- a/tests/containers/test_query.py +++ /dev/null @@ -1,174 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -import pytest -from chython.containers.query import QueryContainer -from chython.containers.bonds import QueryBond -from chython.periodictable import QueryElement - - -def test_query_container_basic(): - # Test basic container creation and atom addition - qc = QueryContainer() - - # Add atoms with different input types - n1 = qc.add_atom('C') # from symbol - assert isinstance(qc._atoms[n1], QueryElement) - - n2 = qc.add_atom(7) # from atomic number (N) - assert isinstance(qc._atoms[n2], QueryElement) - - n3 = qc.add_atom(QueryElement.from_symbol('O')()) # from QueryElement - assert isinstance(qc._atoms[n3], QueryElement) - - -def test_query_container_neighbors(): - # Test neighbors validation and storage - qc = QueryContainer() - - # Test valid neighbors - n1 = qc.add_atom('C', neighbors=2) # single value - assert qc._neighbors[n1] == (2,) - - n2 = qc.add_atom('C', neighbors=[1, 2, 3]) # list of values - assert qc._neighbors[n2] == (1, 2, 3) - - # Test invalid neighbors - with pytest.raises(ValueError): - qc.add_atom('C', neighbors=-1) # negative value - - with pytest.raises(ValueError): - qc.add_atom('C', neighbors=15) # value too large - - with pytest.raises(ValueError): - qc.add_atom('C', neighbors=[1, 1]) # duplicate values - - -def test_query_container_hybridization(): - # Test hybridization validation and storage - qc = QueryContainer() - - # Test valid hybridization - n1 = qc.add_atom('C', hybridization=1) # sp3 - assert qc._hybridizations[n1] == (1,) - - n2 = qc.add_atom('C', hybridization=[1, 2]) # sp3 and sp2 - assert qc._hybridizations[n2] == (1, 2) - - # Test invalid hybridization - with pytest.raises(ValueError): - qc.add_atom('C', hybridization=0) # invalid value - - with pytest.raises(ValueError): - qc.add_atom('C', hybridization=5) # invalid value - - -def test_query_container_rings(): - # Test ring size validation and storage - qc = QueryContainer() - - # Test valid ring sizes - n1 = qc.add_atom('C', rings_sizes=3) # 3-membered ring - assert qc._rings_sizes[n1] == (3,) - - n2 = qc.add_atom('C', rings_sizes=[5, 6]) # 5 and 6-membered rings - assert qc._rings_sizes[n2] == (5, 6) - - # Test invalid ring sizes - with pytest.raises(ValueError): - qc.add_atom('C', rings_sizes=2) # too small - - with pytest.raises(ValueError): - qc.add_atom('C', rings_sizes=[5, 5]) # duplicate values - - -def test_query_container_bonds(): - # Test bond addition and validation - qc = QueryContainer() - n1 = qc.add_atom('C') - n2 = qc.add_atom('C') - - # Add bond with different input types - qc.add_bond(n1, n2, 1) # from int (single bond) - assert isinstance(qc._bonds[n1][n2], QueryBond) - - qc = QueryContainer() - n1 = qc.add_atom('C') - n2 = qc.add_atom('C') - qc.add_bond(n1, n2, (1, 2)) # from tuple (single or double bond) - assert isinstance(qc._bonds[n1][n2], QueryBond) - - -def test_query_container_copy(): - # Test container copying - qc = QueryContainer() - n1 = qc.add_atom('C', neighbors=2, hybridization=1) - n2 = qc.add_atom('N', rings_sizes=6) - qc.add_bond(n1, n2, 1) - - # Make a copy - copy = qc.copy() - - # Verify all attributes are copied - assert copy._neighbors == qc._neighbors - assert copy._hybridizations == qc._hybridizations - assert copy._rings_sizes == qc._rings_sizes - assert len(copy._bonds) == len(qc._bonds) - - -def test_query_container_union(): - # Test container union - qc1 = QueryContainer() - n1 = qc1.add_atom('C', neighbors=2) - n2 = qc1.add_atom('O') - qc1.add_bond(n1, n2, 1) - - qc2 = QueryContainer() - n3 = qc2.add_atom('N', rings_sizes=5) - n4 = qc2.add_atom('C') - qc2.add_bond(n3, n4, 2) - - # Create union with remapping to avoid collisions - union = qc1.union(qc2, remap=True) - - # Verify union properties - assert len(union._atoms) == 4 # total number of atoms - assert len(union._bonds) == 4 # each bond is stored twice (bidirectional) - assert sum(len(bonds) for bonds in union._bonds.values()) == 4 # total number of bond entries - assert len(union._neighbors) == len(qc1._neighbors) + len(qc2._neighbors) - assert len(union._rings_sizes) == len(qc1._rings_sizes) + len(qc2._rings_sizes) - - -def test_query_container_enumerate(): - # Test query enumeration - qc = QueryContainer() - n1 = qc.add_atom('C') - # Add N and O separately to test enumeration - n2 = qc.add_atom('N') - n3 = qc.add_atom('O') - qc.add_bond(n1, n2, (1, 2)) # single or double bond - qc.add_bond(n1, n3, 1) # single bond - - # Enumerate all possible combinations - queries = list(qc.enumerate_queries()) - assert len(queries) >= 2 # at least 2 combinations due to bond types - - # Test with mark enumeration - queries = list(qc.enumerate_queries(enumerate_marks=True)) - assert len(queries) >= 2 # should include mark combinations \ No newline at end of file diff --git a/tests/files/daylight/test_tokenize.py b/tests/files/daylight/test_tokenize.py deleted file mode 100644 index b5b42d34..00000000 --- a/tests/files/daylight/test_tokenize.py +++ /dev/null @@ -1,91 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2025 Ramil Nugmanov -# Copyright 2025 Tagir Akhmetshin -# This file is part of chython. -# -# chython is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this program; if not, see . -# -import pytest -from chython.exceptions import IncorrectSmiles -from chython.files.daylight.parser import parser -from chython.files.daylight.tokenize import smiles_tokenize, smarts_tokenize - - -def test_smiles_tokenize_atoms(): - # Test basic atom tokenization - tokens = list(smiles_tokenize('C')) - assert len(tokens) == 1 - assert isinstance(tokens[0], tuple) - assert len(tokens[0]) == 2 - assert isinstance(tokens[0][1], dict) - assert tokens[0][1].get('element') == 'C' - - -def test_smiles_tokenize_bonds(): - # Test bond tokenization - tokens = list(smiles_tokenize('C=O')) - assert len(tokens) == 3 - assert tokens[1][0] == 1 # bond index - assert tokens[1][1] == 2 # double bond - - -def test_smiles_tokenize_branches(): - # Test branch tokenization - tokens = list(smiles_tokenize('C(O)N')) - assert len(tokens) == 5 - assert tokens[1][0] == 2 # branch start index - assert tokens[3][0] == 3 # branch end index - - -def test_smiles_tokenize_cycles(): - # Test cycle tokenization - tokens = list(smiles_tokenize('C1CCC1')) - assert len(tokens) == 6 - assert tokens[1][0] == 6 # cycle number - - -def test_smiles_tokenize_charges(): - # Test charge tokenization - tokens = list(smiles_tokenize('[NH4+]')) - assert len(tokens) == 1 # NH4+ as a single token - assert tokens[0][1].get('charge') == 1 # positive charge - assert tokens[0][1].get('element') == 'N' # nitrogen - assert tokens[0][1].get('hydrogen') == 4 # 4 hydrogens - - -def test_smarts_tokenize_basic(): - # Test basic SMARTS tokenization - tokens = list(smarts_tokenize('[C]')) - assert len(tokens) == 1 # just C - assert tokens[0][1].get('element') == 'C' - - -def test_smarts_tokenize_bonds(): - # Test bond primitives - tokens = list(smarts_tokenize('CC')) - assert len(tokens) == 2 # C, C - assert tokens[0][1].get('element') == 'C' - assert tokens[1][1].get('element') == 'C' - - -# Special cases test commented out due to unpredictable behavior -# def test_tokenize_special_cases(): -# # Test empty string -# with pytest.raises(IncorrectSmiles, match='invalid smiles'): -# list(smiles_tokenize('')) # empty string should raise IncorrectSmiles -# -# # Test whitespace -# with pytest.raises(IncorrectSmiles, match='invalid smiles'): -# list(smiles_tokenize(' ')) # whitespace should raise IncorrectSmiles