From b57110f30fed216beab6cb69b20a31b359bbeec4 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Aug 2025 09:38:13 +0200 Subject: [PATCH 1/2] fixed unpacking --- chython/containers/__init__.py | 4 ++-- chython/containers/_unpack_v0v2.pyx | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/chython/containers/__init__.py b/chython/containers/__init__.py index e89bba91..ff3dd48c 100644 --- a/chython/containers/__init__.py +++ b/chython/containers/__init__.py @@ -25,11 +25,11 @@ from .reaction import * -def unpach(data: bytes, /, *, compressed=True) -> Union[MoleculeContainer, ReactionContainer]: +def unpach(data: bytes, /, *, compressed=True, skip_labels_calculation=False) -> Union[MoleculeContainer, ReactionContainer]: if compressed: data = decompress(data) try: - return MoleculeContainer.unpack(data, compressed=False) + return MoleculeContainer.unpack(data, compressed=False, skip_labels_calculation=skip_labels_calculation) except ValueError: pass # second try diff --git a/chython/containers/_unpack_v0v2.pyx b/chython/containers/_unpack_v0v2.pyx index 6baef160..049b8d56 100644 --- a/chython/containers/_unpack_v0v2.pyx +++ b/chython/containers/_unpack_v0v2.pyx @@ -118,6 +118,7 @@ def unpack(const unsigned char[::1] data not None): atomic_number = b & 0x7f py_atom = object.__new__(elements[atomic_number]) + py_atom._extended_stereo = None py_n = n py_atoms[py_n] = py_atom py_bonds[py_n] = {} From ea6e2ed1457e0d3fb70cacd8e2b62fbc440ac0ff Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Aug 2025 21:15:44 +0200 Subject: [PATCH 2/2] SSSR cython (#59) SSSR reimplemented on cython with x10-100 speedup depends on mol size --- build.py | 3 + chython/algorithms/_rings.pyx | 477 +++++++++++++++++++++++++ chython/algorithms/aromatics/thiele.py | 7 +- chython/algorithms/rings.py | 375 +------------------ pyproject.toml | 2 +- 5 files changed, 488 insertions(+), 376 deletions(-) create mode 100644 chython/algorithms/_rings.pyx diff --git a/build.py b/build.py index 6f97641d..2884f5c9 100644 --- a/build.py +++ b/build.py @@ -56,6 +56,9 @@ extra_compile_args=extra_compile_args), Extension('chython.files._xyz', ['chython/files/_xyz.pyx'], + extra_compile_args=extra_compile_args), + Extension('chython.algorithms._rings', + ['chython/algorithms/_rings.pyx'], extra_compile_args=extra_compile_args) ] diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx new file mode 100644 index 00000000..c86a2f28 --- /dev/null +++ b/chython/algorithms/_rings.pyx @@ -0,0 +1,477 @@ +# cython: undeclared_check_usage=error +# cython: warn.undeclared=True +# cython: warn.unused=True +# cython: warn.unused_arg=True +# cython: warn.maybe_uninitialized=True +# cython: boundscheck=False +# cython: wraparound=False + +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc +from libc.limits cimport USHRT_MAX +from libc.string cimport memset, memcpy + + +cdef struct paths_t: + # store PID units. paths store only non-terminal nodes + # in the case of distances equals 1 - don't store any paths + unsigned short distance # bonds count on the shortest path between nodes + unsigned char num_pid0 # shortest paths count + unsigned char num_pid1 # shortest+1 paths count + unsigned short **pid0 # array of pointers to the shortest paths arrays + unsigned short **pid1 # array of pointers to the shortest+1 paths arrays + + +cdef struct dist_matrix_t: + unsigned short n_nodes + unsigned short *mapping # idx to node label mapping + paths_t *data + + +cdef struct ring_t: + unsigned short n_nodes + unsigned short *nodes + unsigned long long *hash + + +cdef struct rings_t: + unsigned short n_rings + unsigned short n_allocated + unsigned short n_reserved + unsigned short hash_size + ring_t *rings + + +cdef dist_matrix_t *alloc_dist_matrix(unsigned short n_nodes): + cdef size_t i, total = n_nodes * n_nodes + cdef dist_matrix_t *matrix = PyMem_Malloc(sizeof(dist_matrix_t)) + + matrix.n_nodes = n_nodes + matrix.mapping = PyMem_Malloc(n_nodes * sizeof(unsigned short)) + matrix.data = PyMem_Malloc(total * sizeof(paths_t)) + # set in paths_t all attrs to 0/NULL + memset(matrix.data, 0, total * sizeof(paths_t)) + + # reset distances to max + for i in range(total): + matrix.data[i].distance = USHRT_MAX + return matrix + + +cdef rings_t *alloc_rings(unsigned short n_rings, size_t hash_size): + cdef size_t i, n1 = n_rings + 1 + cdef rings_t *rings = PyMem_Malloc(sizeof(rings_t)) + + rings.n_rings = n_rings + rings.n_allocated = 0 + rings.n_reserved = n1 + rings.hash_size = hash_size + rings.rings = PyMem_Malloc(n1 * sizeof(ring_t)) + memset(rings.rings, 0, n1 * sizeof(ring_t)) + + for i in range(n1): + rings.rings[i].n_nodes = USHRT_MAX + return rings + + +cdef void realloc_rings(rings_t *rings, size_t n_rings): + cdef size_t i + rings.rings = PyMem_Realloc(rings.rings, n_rings * sizeof(ring_t)) + memset(rings.rings + rings.n_reserved, 0, (n_rings - rings.n_reserved) * sizeof(ring_t)) + + for i in range(rings.n_reserved, n_rings): + rings.rings[i].n_nodes = USHRT_MAX + rings.n_reserved = n_rings + + +cdef void free_pid0(paths_t *paths): + cdef size_t i + if paths.num_pid0 == 0: return + for i in range(paths.num_pid0): + PyMem_Free(paths.pid0[i]) + PyMem_Free(paths.pid0) + paths.num_pid0 = 0 + paths.pid0 = NULL + + +cdef void free_pid1(paths_t *paths): + cdef size_t i + if paths.num_pid1 == 0: return + for i in range(paths.num_pid1): + PyMem_Free(paths.pid1[i]) + PyMem_Free(paths.pid1) + paths.num_pid1 = 0 + paths.pid1 = NULL + + +cdef void free_pids(paths_t *paths): + free_pid0(paths) + free_pid1(paths) + + +cdef void free_dist_matrix(dist_matrix_t *matrix): + cdef size_t i, total = matrix.n_nodes * matrix.n_nodes + for i in range(total): + free_pids(&matrix.data[i]) + PyMem_Free(matrix.mapping) + PyMem_Free(matrix.data) + PyMem_Free(matrix) + + +cdef void free_ring(ring_t *ring): + PyMem_Free(ring.hash) + PyMem_Free(ring.nodes) + ring.hash = NULL + ring.nodes = NULL + + +cdef void free_rings(rings_t *rings): + cdef size_t i + for i in range(rings.n_allocated): + free_ring(&rings.rings[i]) + PyMem_Free(rings.rings) + PyMem_Free(rings) + + +cdef void move_paths(paths_t *paths): + free_pid1(paths) + + paths.num_pid1 = paths.num_pid0 + paths.pid1 = paths.pid0 + paths.num_pid0 = 0 + paths.pid0 = NULL + + +cdef unsigned short *concatenate_paths(paths_t *paths_ik, paths_t *paths_kj, unsigned short k): + cdef unsigned short *path + cdef size_t size1, size2 + + path = PyMem_Malloc((paths_ik.distance + paths_kj.distance - 1) * sizeof(unsigned short)) + + size1 = paths_ik.distance - 1 + size2 = paths_kj.distance - 1 + path[size1] = k # put node k to form continuous i-k-j path excluding i,j terminals + if size1: + memcpy(path, paths_ik.pid0[0], size1 * sizeof(unsigned short)) # copy nodes between i-k + if size2: + memcpy(path + paths_ik.distance, paths_kj.pid0[0], size2 * sizeof(unsigned short)) + return path + + +cdef void append_pid0(paths_t *paths_ij, paths_t *paths_ik, paths_t *paths_kj, unsigned short k): + cdef size_t i = paths_ij.num_pid0 + paths_ij.num_pid0 += 1 + paths_ij.pid0 = PyMem_Realloc(paths_ij.pid0, paths_ij.num_pid0 * sizeof(unsigned short *)) + paths_ij.pid0[i] = concatenate_paths(paths_ik, paths_kj, k) + + +cdef void append_pid1(paths_t *paths_ij, paths_t *paths_ik, paths_t *paths_kj, unsigned short k): + cdef size_t i = paths_ij.num_pid1 + paths_ij.num_pid1 += 1 + paths_ij.pid1 = PyMem_Realloc(paths_ij.pid1, paths_ij.num_pid1 * sizeof(unsigned short *)) + paths_ij.pid1[i] = concatenate_paths(paths_ik, paths_kj, k) + + +cdef tuple ring_to_tuple(ring_t *ring): + cdef size_t i + cdef list result = [] + + for i in range(ring.n_nodes): + result.append(ring.nodes[i]) + return tuple(result) + + +cdef int has_not_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij, unsigned short k, int test_pid1): + cdef size_t i, j + cdef unsigned short n1, n2 + cdef unsigned short *path + + if paths_ik.distance == 1: + n1 = k + else: + n1 = paths_ik.pid0[0][0] + if paths_kj.distance == 1: + n2 = k + else: + n2 = paths_kj.pid0[0][paths_kj.distance - 2] + + j = paths_ij.distance - 2 + for i in range(paths_ij.num_pid0): + path = paths_ij.pid0[i] + if path[0] == n1 or path[j] == n2: + return 0 + if test_pid1: + j = paths_ij.distance - 1 + for i in range(paths_ij.num_pid1): + path = paths_ij.pid1[i] + if path[0] == n1 or path[j] == n2: + return 0 + return 1 + + +cdef unsigned long long *build_hash(unsigned short *path1, unsigned short *path2, + unsigned short i, unsigned short j, size_t size1, size_t size2, size_t hash_size): + cdef size_t k + cdef unsigned short n + cdef unsigned long long *hash = PyMem_Malloc(hash_size * sizeof(unsigned long long)) + + memset(hash, 0, hash_size * sizeof(unsigned long long)) + hash[i // 64] |= ( 1 << (i % 64)) # add linker node + hash[j // 64] |= ( 1 << (j % 64)) + + for k in range(size1): + n = path1[k] + hash[n // 64] |= ( 1 << (n % 64)) + for k in range(size2): + n = path2[k] + hash[n // 64] |= ( 1 << (n % 64)) + return hash + + +cdef unsigned short *build_ring(unsigned short *path1, unsigned short *path2, + unsigned short i, unsigned short j, size_t size1, size_t size2): + cdef size_t k, s = size1 + 1 + cdef unsigned short *nodes + + nodes = PyMem_Malloc((size1 + size2 + 2) * sizeof(unsigned short)) + nodes[0] = i + nodes[s] = j + memcpy(nodes + 1, path1, size1 * sizeof(unsigned short)) + + # inverse second path + for k in range(size2 - 1, -1, -1): + s += 1 + nodes[s] = path2[k] + return nodes + + +cdef void push_ring(ring_t *ring, rings_t *rings, unsigned short rank): + cdef size_t i + cdef unsigned short ext + + if rings.n_allocated == rings.n_reserved - 1: # almost all slots are used + ext = rings.n_reserved # double rings storage + if ext > 1000: ext = 1000 # no more than 1k rings to extend + realloc_rings(rings, rings.n_reserved + ext) + + for i in range(rings.n_allocated, rank, -1): + rings.rings[i] = rings.rings[i - 1] + + # insert the new ring at the rank position + rings.rings[rank] = ring[0] # copy dereferenced struct memory + rings.n_allocated += 1 + + +cdef unsigned short get_rank(ring_t *ring, rings_t *rings): + cdef size_t i, j + cdef int hash_match + cdef ring_t *other + + for i in range( rings.n_allocated + 1): + other = &rings.rings[i] + if ring.n_nodes == other.n_nodes: + hash_match = 1 + for j in range(rings.hash_size): + if ring.hash[j] != other.hash[j]: + hash_match = 0 + break + if hash_match: + return USHRT_MAX # duplicate found + elif ring.n_nodes < other.n_nodes: # always true for n_allocated+1 + return i + + +cdef void build_pid(dist_matrix_t *matrix): + cdef size_t i, j, k, sk, si + cdef unsigned short rk + cdef unsigned int d + cdef paths_t *paths_kj + cdef paths_t *paths_ij + cdef paths_t *paths_ik + + for k in range(matrix.n_nodes): + sk = k * matrix.n_nodes + rk = matrix.mapping[k] + for i in range(matrix.n_nodes): + if i == k: continue + si = i * matrix.n_nodes + paths_ik = &matrix.data[si + k] + if paths_ik.distance == USHRT_MAX: continue + + for j in range(matrix.n_nodes): + if j == k or j == i: continue + paths_kj = &matrix.data[sk + j] + if paths_kj.distance == USHRT_MAX: continue + + paths_ij = &matrix.data[si + j] + d = paths_ik.distance + paths_kj.distance + if d < paths_ij.distance: # shorter pid0 path found + if d == paths_ij.distance - 1 and has_not_overlap(paths_ik, paths_kj, paths_ij, rk, 0): + move_paths(paths_ij) + else: # drop old paths + free_pids(paths_ij) + + paths_ij.distance = d + append_pid0(paths_ij, paths_ik, paths_kj, rk) + elif d == paths_ij.distance: # new pid0 path + if has_not_overlap(paths_ik, paths_kj, paths_ij, rk, 0): + append_pid0(paths_ij, paths_ik, paths_kj, rk) + elif d == paths_ij.distance + 1: # new pid1 path + if has_not_overlap(paths_ik, paths_kj, paths_ij, rk, 1): + append_pid1(paths_ij, paths_ik, paths_kj, rk) + + +cdef rings_t *build_cset(dist_matrix_t *matrix, size_t n_rings): + cdef size_t i, j, k, si, n_max = 0 + cdef unsigned short ri, rj, d, rank + cdef ring_t ring + cdef paths_t *path + cdef rings_t *rings + + for i in range(matrix.n_nodes): + ri = matrix.mapping[i] + if ri > n_max: n_max = ri + + rings = alloc_rings(n_rings, (n_max + 64 - 1) // 64) + + for i in range(matrix.n_nodes): + si = i * matrix.n_nodes + ri = matrix.mapping[i] + for j in range(matrix.n_nodes): + if i == j: continue + path = &matrix.data[si + j] + d = path.distance # make shortcut + if d == USHRT_MAX: continue # different components + rj = matrix.mapping[j] + + if d == 1: # triangles + if not path.num_pid1: continue + ring.n_nodes = 3 + for k in range(path.num_pid1): + ring.hash = build_hash(NULL, path.pid1[k], ri, rj, 0, 1, rings.hash_size) + + rank = get_rank(&ring, rings) + if rank != USHRT_MAX: + ring.nodes = build_ring(path.pid1[k], NULL, ri, rj, 1, 0) + push_ring(&ring, rings, rank) + else: + PyMem_Free(ring.hash) + elif path.num_pid0 == 1: # is odd? + if not path.num_pid1: continue + ring.n_nodes = 2 * d + 1 + for k in range(path.num_pid1): + ring.hash = build_hash(path.pid0[0], path.pid1[k], ri, rj, d - 1, d, rings.hash_size) + + rank = get_rank(&ring, rings) + if rank != USHRT_MAX: + ring.nodes = build_ring(path.pid0[0], path.pid1[k], ri, rj, d - 1, d) + push_ring(&ring, rings, rank) + else: + PyMem_Free(ring.hash) + else: # is even + ring.n_nodes = 2 * d + d -= 1 + for k in range(1, path.num_pid0): + ring.hash = build_hash(path.pid0[k - 1], path.pid0[k], ri, rj, d, d, rings.hash_size) + + rank = get_rank(&ring, rings) + if rank != USHRT_MAX: + ring.nodes = build_ring(path.pid0[k - 1], path.pid0[k], ri, rj, d, d) + push_ring(&ring, rings, rank) + else: + PyMem_Free(ring.hash) + return rings + + +cdef void filter_rings(rings_t *rings): + cdef size_t i, j, k, pc = 0 + cdef ring_t *ring + cdef int is_unique + cdef unsigned long long *hash + cdef unsigned short *poplist + + if rings.n_allocated == rings.n_rings: return + + hash = PyMem_Malloc(rings.hash_size * sizeof(unsigned long long)) + poplist = PyMem_Malloc(rings.n_allocated * sizeof(unsigned short)) + memcpy(hash, rings.rings[0].hash, rings.hash_size * sizeof(unsigned long long)) + + for i in range(1, rings.n_allocated): + is_unique = 0 + ring = &rings.rings[i] + for j in range(rings.hash_size): + if ring.hash[j] & (~hash[j]): + is_unique = 1 + break + if is_unique: # extend global hash + for j in range(rings.hash_size): + hash[j] |= ring.hash[j] + else: + poplist[pc] = i + pc += 1 + + for i in range(rings.n_allocated - 1, rings.n_rings - 1, -1): + pc -= 1 + k = poplist[pc] + + free_ring(&rings.rings[k]) # drop condensed ring + for j in range(k, i): + rings.rings[j] = rings.rings[j + 1] + + rings.n_allocated = rings.n_rings + PyMem_Free(poplist) + PyMem_Free(hash) + + +def sssr(object graph, size_t n_rings): + cdef size_t si + cdef unsigned short i, n, m, n_nodes = len(graph) + cdef unsigned short [USHRT_MAX] reverse_mapping # 128kb + cdef rings_t *rings + cdef dist_matrix_t *matrix + cdef object mb + cdef list output = [] + + if not n_rings: return output + + if n_nodes > 65500: raise ValueError('Too many atoms') + matrix = alloc_dist_matrix(n_nodes) + for i, n in enumerate(graph): + if n > 65500: raise ValueError('Atom index too large') + matrix.mapping[i] = n + reverse_mapping[n] = i + + for n, mb in graph.items(): + si = n_nodes * reverse_mapping[n] + for m in mb: + matrix.data[si + reverse_mapping[m]].distance = 1 + + # run PID matrix calculation + build_pid(matrix) + # run CSET calculation + rings = build_cset(matrix, n_rings) + free_dist_matrix(matrix) + # filter out condensed rings + filter_rings(rings) + + for i in range(rings.n_rings): + output.append(ring_to_tuple(&rings.rings[i])) + + free_rings(rings) + return output diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index c6682247..3c42fc45 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2024 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,7 +19,8 @@ from collections import defaultdict from typing import TYPE_CHECKING from ._rules import freak_rules -from ..rings import _sssr, _connected_components +from .._rings import sssr +from ..rings import _connected_components if TYPE_CHECKING: @@ -186,7 +187,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: n_sssr = sum(len(x) for x in rings.values()) // 2 - len(rings) + len(_connected_components(rings)) if not n_sssr: return False - rings = _sssr(rings, n_sssr) # search rings again + rings = sssr(rings, n_sssr) # search rings again seen = set() for ring in rings: diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index f7dc58de..8f5f7ccd 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -18,10 +18,8 @@ # from collections import defaultdict, deque from functools import cached_property -from itertools import combinations -from operator import itemgetter -from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union -from ..exceptions import ImplementationError +from typing import Any, Dict, List, Set, Tuple, Union, TYPE_CHECKING +from ._rings import sssr if TYPE_CHECKING: @@ -45,7 +43,7 @@ def sssr(self) -> List[Tuple[int, ...]]: :return rings atoms numbers """ if self.rings_count: - return _sssr(self.not_special_connectivity, self.rings_count) + return sssr(_skin_graph(self.not_special_connectivity), self.rings_count) return [] @cached_property @@ -148,17 +146,6 @@ def rings_graph(self: 'MoleculeContainer'): return bonds -def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> List[Tuple[int, ...]]: - """ - Smallest Set of Smallest Rings of any adjacency matrix. - Number of rings required. - """ - bonds = _skin_graph(bonds) - paths = _bfs(bonds) - pid1, pid2, dist = _make_pid(paths) - return _rings_filter(_c_set(pid1, pid2, dist), n_sssr) - - def _connected_components(bonds: Dict[int, Union[Set[int], Dict[int, Any]]]) -> List[Set[int]]: atoms = set(bonds) components = [] @@ -192,360 +179,4 @@ def _skin_graph(bonds: Dict[int, Union[Set[int], Dict[int, Any]]]) -> Dict[int, return bonds -def _bfs(bonds): - atoms = set(bonds) - terminated = [] - tail = atoms.pop() - next_stack = {x: [tail, x] for x in bonds[tail]} - - while True: - next_front = set() - found_odd = set() - stack, next_stack = next_stack, {} - for tail, path in stack.items(): - neighbors = bonds[tail] & atoms - next_front.add(tail) - - if len(neighbors) == 1: - n = neighbors.pop() - if n in found_odd: - if len(path) != 1: - terminated.append(tuple(path)) # save second ring closure - next_stack[n] = [n] # maybe we have another path? - else: - path.append(n) - if n in stack: # odd rings - found_odd.add(tail) - terminated.append(tuple(path)) # found ring closure. save path. - elif n in next_stack: # even rings - terminated.append(tuple(path)) - if len(next_stack[n]) != 1: # prevent bicycle case - terminated.append(tuple(next_stack[n])) - next_stack[n] = [n] - else: - next_stack[n] = path # grow must go on - elif neighbors: - if len(path) != 1: - terminated.append(tuple(path)) # save path. - for n in neighbors: - if n in found_odd: - if n in stack: - if n in next_stack: - del next_stack[n] - else: - next_stack[n] = [n] - else: - path = [tail, n] - if n in stack: # odd rings - found_odd.add(tail) - terminated.append(tuple(path)) - elif n in next_stack: # even rings - terminated.append(tuple(path)) - if len(next_stack[n]) != 1: # prevent bicycle case - terminated.append(tuple(next_stack[n])) - next_stack[n] = [n] - else: - next_stack[n] = path - - atoms.difference_update(next_front) - if not atoms: - break - elif not next_stack: - tail = atoms.pop() - next_stack = {x: [tail, x] for x in bonds[tail] & atoms} - return terminated - - -def _make_pid(paths: List[List[int]]): - pid1 = defaultdict(lambda: defaultdict(dict)) - pid2 = defaultdict(lambda: defaultdict(dict)) - distances = defaultdict(lambda: defaultdict(lambda: 1e9)) - chains = sorted(paths, key=len) - for c in chains: - di = len(c) - 1 - n, m = c[0], c[-1] - nn, mm = c[1], c[-2] - if n in distances and m in distances[n] and distances[n][m] != di: - pid2[n][m][(nn, mm)] = c - pid2[m][n][(mm, nn)] = c[::-1] - else: - pid1[n][m][(nn, mm)] = c - pid1[m][n][(mm, nn)] = c[::-1] - distances[n][m] = distances[m][n] = di - - for k in pid1: - new_distances = defaultdict(dict) - dk = distances[k] - ndk = new_distances[k] - for i in pid1: - if i == k: - continue - di = distances[i] - ndi = new_distances[i] - ndk[i] = ndi[k] = di[k] - for j in pid1: - if j == k or j == i: - continue - ij = di[j] - ikj = di[k] + dk[j] - if ij - ikj == 1: # A new shortest path == previous shortest path - 1 - pid2[i][j] = pid1[i][j] - pid1[i][j] = {(ni, mj): ip[:-1] + jp for ((ni, _), ip), ((_, mj), jp) in - zip(pid1[i][k].items(), pid1[k][j].items())} - ndi[j] = ikj - elif ij > ikj: # A new shortest path - pid2[i][j] = {} - pid1[i][j] = {(ni, mj): ip[:-1] + jp for ((ni, _), ip), ((_, mj), jp) in - zip(pid1[i][k].items(), pid1[k][j].items())} - ndi[j] = ikj - elif ij == ikj: # Another shortest path - pid1[i][j].update({(ni, mj): ip[:-1] + jp for ((ni, _), ip), ((_, mj), jp) in - zip(pid1[i][k].items(), pid1[k][j].items())}) - ndi[j] = ij - elif ikj - ij == 1: # Shortest+1 path - pid2[i][j].update({(ni, mj): ip[:-1] + jp for ((ni, _), ip), ((_, mj), jp) in - zip(pid1[i][k].items(), pid1[k][j].items())}) - ndi[j] = ij - else: - ndi[j] = ij - distances = new_distances - return pid1, pid2, distances - - -def _c_set(pid1, pid2, pid1l): - c_set = [] - seen = set() - for i, p1i in pid1.items(): - seen.add(i) - di = pid1l[i] - p2i = pid2[i] - - for j, p1ij in p1i.items(): - if j in seen: - continue - p1ij = list(p1ij.values()) - p2ij = list(p2i[j].values()) - dij = di[j] * 2 - - if len(p1ij) == 1: # one shortest - if not p2ij: # need shortest + 1 path - continue - c_set.append((dij + 1, p1ij, p2ij)) - elif not p2ij: # one or more odd rings - c_set.append((dij, p1ij, None)) - else: # odd and even rings found (e.g. bicycle) - c_set.append((dij, p1ij, None)) - c_set.append((dij + 1, p1ij, p2ij)) - - for c_num, p1ij, p2ij in sorted(c_set, key=itemgetter(0)): - if c_num % 2: # odd rings - for c1 in p1ij: - for c2 in p2ij: - c = c1 + c2[-2:0:-1] - if len(c) == len(set(c)): - yield _canonic_ring(c) - else: - for c1, c2 in zip(p1ij, p1ij[1:]): - c = c1 + c2[-2:0:-1] - if len(c) == len(set(c)): - yield _canonic_ring(c) - - -def _canonic_ring(ring: Tuple[int, ...]) -> Tuple[int, ...]: - n = min(ring) - ndx = ring.index(n) - if ndx == 0: - if ring[-1] < ring[1]: - return n, *ring[:0:-1] - return ring - elif ndx == len(ring) - 1: - if ring[0] > ring[-2]: - return ring[::-1] - return n, *ring[:-1] - if ring[ndx + 1] > ring[ndx - 1]: - return *ring[ndx::-1], *ring[:ndx:-1] - return *ring[ndx:], *ring[:ndx] - - -def _ring_scissors(ring: Tuple[int, ...], n: int, m: int) -> Tuple[int, ...]: - ndx = ring.index(n) - mdx = ring.index(m) - if ndx == 0: - if mdx == 1: - return n, *ring[:0:-1] - return ring - elif ndx == len(ring) - 1: - if mdx == 0: - return ring[::-1] - return n, *ring[:-1] - if ndx < mdx: - return *ring[ndx::-1], *ring[:ndx:-1] - return *ring[ndx:], *ring[:ndx] - - -def _ring_adjacency(ring: Tuple[int, ...]) -> Dict[int, List[int]]: - adj = {ring[0]: [ring[-1]]} # ring adjacency matrix - for n, m in zip(ring, ring[1:]): - adj[n].append(m) - adj[m] = [n] - adj[m].append(ring[0]) - return adj - - -def _is_condensed_ring(c, sssr, seen_rings): - # create graph of connected neighbour rings - ck = seen_rings[c] - neighbors = {x: set() for x in sssr if len(seen_rings[x].keys() & ck.keys()) > 1} - if len(neighbors) > 1: - for (i, iv), (j, jv) in combinations(neighbors.items(), 2): - if len(seen_rings[i].keys() & seen_rings[j].keys()) > 1: - iv.add(j) - jv.add(i) - # check if hold rings is combination of existing. (123654) is combo of (1254) and (2365) - # - # 1--2--3 - # | | | - # 4--5--6 - # - # modified NX.dfs_labeled_edges - # https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.\ - # traversal.depth_first_search.dfs_labeled_edges.html - depth_limit = len(neighbors) - 1 - for start, nbrs in neighbors.items(): - if not nbrs: - continue - stack = [(start, seen_rings[start], depth_limit, iter(nbrs), {start})] - while stack: - parent, p_adj, depth_now, children, seen = stack[-1] - try: - child = next(children) - except StopIteration: - stack.pop() - else: - if child not in seen: - common = p_adj.keys() & seen_rings[child].keys() - if len(common) > 2: # only terminal common atoms required - term = {n for n in common if len(common.intersection(p_adj[n])) == 1} - if len(term) != 2: # skip multiple contacts - continue - common.difference_update(term) - n, m = term - mc = _canonic_ring( - (*_ring_scissors(tuple(x for x in parent if x not in common), n, m), - *_ring_scissors(tuple(x for x in child if x not in common), m, n)[1:-1])) - elif len(common) == 2: - n, m = common - mc = _canonic_ring((*_ring_scissors(parent, n, m), *_ring_scissors(child, m, n)[1:-1])) - else: # point connections - continue - if c == mc: # macrocycle found - return True - elif depth_now and 2 < len(mc) <= len(c) + 1: - stack.append((mc, _ring_adjacency(mc), depth_now - 1, iter(neighbors[child]), - {child} | seen)) - return False - - -def _get_unique_chord(ring: Tuple[int, ...], common: Set[int]) -> Optional[Tuple[int, ...]]: - lc = len(common) - if len(ring) == lc: - if common == set(ring): - return () - else: - if common == set(ring[:lc]): - return *ring[lc - 1:], ring[0] - for _ in range(len(ring) - 1): - ring = (*ring[1:], ring[0]) - if common == set(ring[:lc]): - return *ring[lc - 1:], ring[0] - - -def _connected_rings(rings, seen_rings): - rings = rings.copy() - out = [] - for i in range(len(rings)): - c = rings[i] - ck = seen_rings[c] - for j in range(i + 1, len(rings)): - r = rings[j] - rk = seen_rings[r] - common = rk.keys() & ck.keys() - if len(common) == 2: # one common bond - n, m = common - if m in ck[n] and m in rk[n]: # only common bond! - c = _canonic_ring((*_ring_scissors(c, n, m), *_ring_scissors(r, m, n)[1:-1])) - ck = _ring_adjacency(c) - rings[j] = c - seen_rings[c] = ck - break - elif len(common) > 2: - cc = _get_unique_chord(c, common) - if cc is None: # skip multitouched rings - continue - r = _get_unique_chord(r, common) - if r is None: - continue - if cc: - if r: - if r[0] == cc[0]: - r = r[::-1] - c = _canonic_ring((*cc, *r[1:-1])) - ck = _ring_adjacency(c) - rings[j] = c - seen_rings[c] = ck - break - else: - c = _canonic_ring(cc) - ck = _ring_adjacency(c) - rings[j] = c - seen_rings[c] = ck - break - elif r: - c = _canonic_ring(r) - ck = _ring_adjacency(c) - rings[j] = c - seen_rings[c] = ck - break - else: # isolated ring[s] found - out.append(c) - return out - - -def _rings_filter(rings, n_sssr): - c = next(rings) - if n_sssr == 1: - return [c] - - seen_rings = {c} - sssr_atoms = set(c) - sssr = [c] - hold = [] - for c in rings: - if c in seen_rings: - continue - seen_rings.add(c) - if sssr_atoms.issuperset(c): # potentially condensed ring - hold.append(c) - continue - sssr_atoms.update(c) - sssr.append(c) - if len(sssr) == n_sssr: - return sssr - - # now we have set of plug rings (cuban fullerene), besiege rings and condensed trash - seen_rings = {c: _ring_adjacency(c) for c in seen_rings} # prepare adjacency - condensed_rings = _connected_rings(sssr, seen_rings) # collection of contours of condensed rings - - for c in hold: - if c in condensed_rings or _is_condensed_ring(c, sssr, seen_rings): - continue - condensed_rings.insert(0, c) - condensed_rings = _connected_rings(condensed_rings, seen_rings) - sssr.append(c) - if len(sssr) == n_sssr: - return sorted(sssr, key=len) - - raise ImplementationError('SSSR count not reached') - - __all__ = ['Rings'] diff --git a/pyproject.toml b/pyproject.toml index cd333010..ad15a8ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = 'chython' -version = '2.6' +version = '2.7' description = 'Library for processing molecules and reactions in python way' authors = ['Ramil Nugmanov '] license = 'LGPLv3'