From 2e27fb4a35bc6706170f9ddcc4950e5f744b53b5 Mon Sep 17 00:00:00 2001 From: stsouko Date: Sun, 27 Apr 2025 23:00:09 +0200 Subject: [PATCH 01/16] reimplemented pid calculation --- chython/algorithms/rings.py | 50 +++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index f7dc58de..37f5cbb7 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -548,4 +548,54 @@ def _rings_filter(rings, n_sssr): raise ImplementationError('SSSR count not reached') +def _make_pids(graph): + dist = {} + pid0 = {} + pid1 = {} + for n, bs in graph.items(): + dist[n] = dn = {m: 999_999_999 for m in graph} + pid0[n] = pn = {} + pid1[n] = {} + for m in bs: + dn[m] = 1 + pn[m] = [(n, m)] + + for k in graph: + dk = dist[k] + pk = pid0[k] + for i in graph: + if i == k: continue + if (dki := dk[i]) == 999_999_999: continue + di = dist[i] + p0i = pid0[i] + p1i = pid1[i] + p0ik = p0i[k][0][:-1] # pick only one path and drop atom K + + for j in graph: + if j == k or j == i: continue + if (dkj := dk[j]) == 999_999_999: continue + if (dij := di[j]) > (d := dki + dkj): + p = p0ik + pk[j][0] + if dij == d + 1: + pij = p0i[j] + if not any(x[1] == p[1] or x[-2] == p[-2] for x in pij): + p1i[j] = pij # move into pid1 + else: + p1i[j] = [] # init pid1 + di[j] = d + p0i[j] = [p] # override old paths + elif dij == d: + pij = p0i[j] + p = p0ik + pk[j][0] + if any(x[1] == p[1] or x[-2] == p[-2] for x in pij): continue + pij.append(p) + elif dij == d - 1: + p = p0ik + pk[j][0] + if any(x[1] == p[1] or x[-2] == p[-2] for x in p0i[j]): continue + pij = p1i[j] + if any(x[1] == p[1] or x[-2] == p[-2] for x in pij): continue + pij.append(p) + return pid0, pid1 + + __all__ = ['Rings'] From ffd8245b28c6bdf4ed1bca841624ca689cc4c4ff Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 26 Jul 2025 17:13:31 +0200 Subject: [PATCH 02/16] make pids reimplemented on cython to speedup --- build.py | 3 + chython/algorithms/_rings.pyx | 196 ++++++++++++++++++++++++++++++++++ chython/algorithms/rings.py | 50 --------- 3 files changed, 199 insertions(+), 50 deletions(-) create mode 100644 chython/algorithms/_rings.pyx diff --git a/build.py b/build.py index 6f97641d..2884f5c9 100644 --- a/build.py +++ b/build.py @@ -56,6 +56,9 @@ extra_compile_args=extra_compile_args), Extension('chython.files._xyz', ['chython/files/_xyz.pyx'], + extra_compile_args=extra_compile_args), + Extension('chython.algorithms._rings', + ['chython/algorithms/_rings.pyx'], extra_compile_args=extra_compile_args) ] diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx new file mode 100644 index 00000000..dfc18e4e --- /dev/null +++ b/chython/algorithms/_rings.pyx @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2025 Ramil Nugmanov +# This file is part of chython. +# +# chython is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, see . +# +from cpython.mem cimport PyMem_Malloc, PyMem_Free +from libc.limits cimport UINT_MAX +from libc.string cimport memset, memcpy + + +cdef extern from "Python.h": + dict _PyDict_NewPresized(Py_ssize_t minused) + + +cdef unsigned int ** alloc_pid(size_t n_nodes, size_t max_paths): + cdef unsigned int **pid = PyMem_Malloc(n_nodes * n_nodes * max_paths * sizeof(unsigned int *)) + memset(pid, 0, n_nodes * n_nodes * max_paths * sizeof(unsigned int *)) + return pid + + +cdef void free_pid(unsigned int **pid, size_t n_nodes, size_t max_paths): + cdef size_t i + for i in range(n_nodes * n_nodes * max_paths): + PyMem_Free(pid[i]) + PyMem_Free(pid) + + +cdef void free_paths(unsigned int **pid, size_t ij, size_t max_paths): + cdef size_t i + + ij *= max_paths + for i in range(ij, ij + max_paths): + if pid[i] == NULL: + break + PyMem_Free(pid[i]) + pid[i] = NULL + + +cdef void move_paths(unsigned int **pid0, unsigned int **pid1, size_t ij, size_t max_paths): + cdef size_t i + + ij *= max_paths + for i in range(ij, ij + max_paths): + # run max_path times to make sure all pid1 paths overridden + PyMem_Free(pid1[i]) + pid1[i] = pid0[i] + pid0[i] = NULL + + +cdef void append_path(unsigned int **pid, size_t ij, size_t max_paths, unsigned int *path): + cdef size_t i + + ij *= max_paths + for i in range(ij, ij + max_paths): + if pid[i] == NULL: + pid[i] = path + return + raise MemoryError('Reached max allowed paths') + + +cdef unsigned int * concatenate_paths(unsigned int **pid, unsigned int *dist, size_t i, size_t j, size_t max_paths): + cdef unsigned int *path + cdef unsigned int size1, size2 + + size1 = dist[i] + size2 = dist[j] + 1 # sizes are edges counts in a path, not nodes, thus, +1 + + path = PyMem_Malloc((size1 + size2) * sizeof(unsigned int)) + + memcpy(path, pid[i * max_paths], size1 * sizeof(unsigned int)) # dropping last node + memcpy(path + size1, pid[j * max_paths], size2 * sizeof(unsigned int)) + return path + + +cdef int has_overlap(unsigned int **pid0, unsigned int **pid1, unsigned int *dist, + size_t i, size_t j, size_t k, size_t max_paths, size_t shift): + cdef size_t n + cdef unsigned int n1, n2 + cdef unsigned int d + cdef unsigned int *path + + n1 = pid0[i * max_paths][1] + n2 = pid0[j * max_paths][dist[j] - 1] + + d = dist[k] - shift + k *= max_paths + for n in range(k, k + max_paths): + path = pid1[n] + if path == NULL: + return 0 + if path[1] == n1 or path[d] == n2: + return 1 + return 0 + + +def make_pids(dict graph, size_t max_paths=10): + cdef size_t n_nodes = len(graph), i, j, k, sk, si, ij, kj, ik + cdef unsigned int d, dki, dkj, dij + cdef object n, m, mb + cdef dict reverse_mapping + cdef unsigned int *path + cdef unsigned int *path1 + cdef unsigned int *path2 + + cdef unsigned int *node_mapping = PyMem_Malloc(n_nodes * sizeof(unsigned int)) + cdef unsigned int *dist = PyMem_Malloc(n_nodes * n_nodes * sizeof(unsigned int)) + cdef unsigned int **pid0 = alloc_pid(n_nodes, max_paths) + cdef unsigned int **pid1 = alloc_pid(n_nodes, max_paths) + + memset(dist, 255, n_nodes * n_nodes * sizeof(unsigned int)) + + reverse_mapping = _PyDict_NewPresized(n_nodes) + for i, n in enumerate(graph): + node_mapping[i] = n + reverse_mapping[n] = i + + for n, mb in graph.items(): + i = reverse_mapping[n] + si = i * n_nodes + for m in mb: + j = reverse_mapping[m] + ij = si + j + dist[ij] = 1 + path = PyMem_Malloc(2 * sizeof(unsigned int)) + path[0] = n + path[1] = m + append_path(pid0, ij, max_paths, path) + + for k in range(n_nodes): + sk = k * n_nodes + for i in range(n_nodes): + if i == k: continue + dki = dist[sk + i] + if dki == UINT_MAX: continue + + si = i * n_nodes + ik = si + k + for j in range(n_nodes): + if j == k or j == i: continue + kj = sk + j + dkj = dist[kj] + if dkj == UINT_MAX: continue + + ij = si + j + dij = dist[ij] + d = dki + dkj + if d < dij: # shorter pid0 path found + dist[ij] = d + + if d == dij - 1 and not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): + move_paths(pid0, pid1, ij, max_paths) + else: # override old paths + free_paths(pid0, ij, max_paths) + free_paths(pid1, ij, max_paths) + + path = concatenate_paths(pid0, dist, ik, kj, max_paths) + append_path(pid0, ij, max_paths, path) + elif d == dij: # new pid0 path + if not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): + path = concatenate_paths(pid0, dist, ik, kj, max_paths) + append_path(pid0, ij, max_paths, path) + elif d == dij + 1: # new pid1 path + if not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): + if not has_overlap(pid0, pid1, dist, ik, kj, ij, max_paths, 0): + path = concatenate_paths(pid0, dist, ik, kj, max_paths) + append_path(pid1, ij, max_paths, path) + + # DEBUG + # for i in range(n_nodes): + # for j in range(n_nodes): + # if i == j: continue + # for k in range((i * n_nodes + j) * max_paths, (i * n_nodes + j + 1) * max_paths): + # path = pid0[k] + # if path != NULL: + # print('!', node_mapping[i], node_mapping[j], [path[d] for d in range(dist[i * n_nodes + j] + 1)]) + # path = pid1[k] + # if path != NULL: + # print('?', node_mapping[i], node_mapping[j], [path[d] for d in range(dist[i * n_nodes + j] + 2)]) + + PyMem_Free(node_mapping) + PyMem_Free(dist) + free_pid(pid0, n_nodes, max_paths) + free_pid(pid1, n_nodes, max_paths) diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 37f5cbb7..f7dc58de 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -548,54 +548,4 @@ def _rings_filter(rings, n_sssr): raise ImplementationError('SSSR count not reached') -def _make_pids(graph): - dist = {} - pid0 = {} - pid1 = {} - for n, bs in graph.items(): - dist[n] = dn = {m: 999_999_999 for m in graph} - pid0[n] = pn = {} - pid1[n] = {} - for m in bs: - dn[m] = 1 - pn[m] = [(n, m)] - - for k in graph: - dk = dist[k] - pk = pid0[k] - for i in graph: - if i == k: continue - if (dki := dk[i]) == 999_999_999: continue - di = dist[i] - p0i = pid0[i] - p1i = pid1[i] - p0ik = p0i[k][0][:-1] # pick only one path and drop atom K - - for j in graph: - if j == k or j == i: continue - if (dkj := dk[j]) == 999_999_999: continue - if (dij := di[j]) > (d := dki + dkj): - p = p0ik + pk[j][0] - if dij == d + 1: - pij = p0i[j] - if not any(x[1] == p[1] or x[-2] == p[-2] for x in pij): - p1i[j] = pij # move into pid1 - else: - p1i[j] = [] # init pid1 - di[j] = d - p0i[j] = [p] # override old paths - elif dij == d: - pij = p0i[j] - p = p0ik + pk[j][0] - if any(x[1] == p[1] or x[-2] == p[-2] for x in pij): continue - pij.append(p) - elif dij == d - 1: - p = p0ik + pk[j][0] - if any(x[1] == p[1] or x[-2] == p[-2] for x in p0i[j]): continue - pij = p1i[j] - if any(x[1] == p[1] or x[-2] == p[-2] for x in pij): continue - pij.append(p) - return pid0, pid1 - - __all__ = ['Rings'] From a4fb28326c6c20034d04c9ae3b378c24b982e28b Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 26 Jul 2025 22:35:39 +0200 Subject: [PATCH 03/16] even rings building implemented. --- chython/algorithms/_rings.pyx | 159 ++++++++++++++++++++++++++++++++-- 1 file changed, 154 insertions(+), 5 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index dfc18e4e..b0837972 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -49,6 +49,20 @@ cdef void free_paths(unsigned int **pid, size_t ij, size_t max_paths): pid[i] = NULL +cdef void free_rings(unsigned int **rings, size_t size): + cdef size_t i + for i in range(size): + PyMem_Free(rings[i]) + PyMem_Free(rings) + + +cdef void free_hashes(unsigned long long **hashes, size_t size): + cdef size_t i + for i in range(size): + PyMem_Free(hashes[i]) + PyMem_Free(hashes) + + cdef void move_paths(unsigned int **pid0, unsigned int **pid1, size_t ij, size_t max_paths): cdef size_t i @@ -106,21 +120,110 @@ cdef int has_overlap(unsigned int **pid0, unsigned int **pid1, unsigned int *dis return 0 -def make_pids(dict graph, size_t max_paths=10): - cdef size_t n_nodes = len(graph), i, j, k, sk, si, ij, kj, ik +cdef unsigned int * build_ring(unsigned int *path1, unsigned int *path2, size_t size1, size_t size2): + cdef size_t i + cdef unsigned int *path + + # size 1 and 2 are edges counts. paths have 2 overlapped atoms + path = PyMem_Malloc((size1 + size2) * sizeof(unsigned int)) + + memcpy(path, path1, size1 * sizeof(unsigned int)) # drop last node + + # inverse second path + for i in range(size2, 0, -1): # drop first node + path[size1] = path2[i] + size1 += 1 + return path + + +cdef unsigned long long * build_hash(unsigned int *ring, size_t ring_size, size_t hash_size): + cdef size_t i + cdef unsigned int node + cdef unsigned long long *hash = PyMem_Malloc(hash_size * sizeof(unsigned long long)) + + memset(hash, 0, hash_size * sizeof(unsigned long long)) + + for i in range(ring_size): + node = ring[i] + hash[node // 64] |= (1 << (node % 64)) + return hash + + +cdef size_t get_rank(unsigned int *rings, size_t size, size_t n_rings): + cdef size_t i + for i in range(n_rings): + if size < rings[i]: + return i + return UINT_MAX + + +cdef int compare_rings(unsigned long long *hash1, unsigned long long *ring2, size_t hash_size): + cdef size_t i + for i in range(hash_size): + if hash1[i] ^ ring2[i] != 0: + return 1 # doesn't match + return 0 + + +cdef int check_ring_existence(unsigned long long *ring, unsigned long long **rings, unsigned int *ring_sizes, + size_t ring_size, size_t hash_size, size_t n_rings): + cdef size_t i + cdef unsigned int size + for i in range(n_rings): + size = ring_sizes[i] + if size == ring_size: + if compare_rings(ring, rings[i], hash_size) == 0: + return 0 + elif size > ring_size: + return 1 + return 1 + + +cdef void push_ring(unsigned int **rings, unsigned long long **ring_hashes, unsigned int *ring_sizes, + unsigned int *ring, unsigned long long *hash, unsigned int size, size_t rank, size_t n_rings): + cdef size_t i, i1, n1 + + n1 = n_rings - 1 + PyMem_Free(rings[n1]) + PyMem_Free(ring_hashes[n1]) + + for i in range(n1, rank, -1): + i1 = i - 1 + rings[i] = rings[i1] + ring_hashes[i] = ring_hashes[i1] + ring_sizes[i] = ring_sizes[i1] + + # Insert the new ring at the rank position + rings[rank] = ring + ring_hashes[rank] = hash + ring_sizes[rank] = size + + +def sssr(dict graph, size_t n_rings, size_t max_paths=10): + cdef size_t n_nodes = len(graph), i, j, k, sk, si, ij, kj, ik, size, rank + cdef size_t hash_size = (n_nodes + 63) // 64 cdef unsigned int d, dki, dkj, dij cdef object n, m, mb cdef dict reverse_mapping cdef unsigned int *path cdef unsigned int *path1 cdef unsigned int *path2 + cdef unsigned int *ring + cdef unsigned long long *hash - cdef unsigned int *node_mapping = PyMem_Malloc(n_nodes * sizeof(unsigned int)) + cdef unsigned int *node_mapping = PyMem_Malloc(n_nodes * sizeof(unsigned int)) cdef unsigned int *dist = PyMem_Malloc(n_nodes * n_nodes * sizeof(unsigned int)) cdef unsigned int **pid0 = alloc_pid(n_nodes, max_paths) cdef unsigned int **pid1 = alloc_pid(n_nodes, max_paths) + cdef unsigned int *ring_sizes = PyMem_Malloc(n_rings * sizeof(unsigned int)) + cdef unsigned int **rings = PyMem_Malloc(n_rings * sizeof(unsigned int *)) + cdef unsigned long long **ring_hashes = PyMem_Malloc(n_rings * sizeof(unsigned long long *)) + memset(dist, 255, n_nodes * n_nodes * sizeof(unsigned int)) + memset(ring_sizes, 255, n_rings * sizeof(unsigned int)) + memset(rings, 0, n_rings * sizeof(unsigned int *)) + memset(ring_hashes, 0, n_rings * sizeof(unsigned long long *)) reverse_mapping = _PyDict_NewPresized(n_nodes) for i, n in enumerate(graph): @@ -135,8 +238,8 @@ def make_pids(dict graph, size_t max_paths=10): ij = si + j dist[ij] = 1 path = PyMem_Malloc(2 * sizeof(unsigned int)) - path[0] = n - path[1] = m + path[0] = i + path[1] = j append_path(pid0, ij, max_paths, path) for k in range(n_nodes): @@ -190,7 +293,53 @@ def make_pids(dict graph, size_t max_paths=10): # if path != NULL: # print('?', node_mapping[i], node_mapping[j], [path[d] for d in range(dist[i * n_nodes + j] + 2)]) + for i in range(n_nodes): + si = i * n_nodes + for j in range(n_nodes): + if i == j: continue + ij = si + j + d = dist[ij] + if d == UINT_MAX: continue # different components + + ij *= max_paths + path2 = pid0[ij + 1] + if path2 == NULL: # is odd? + ... + else: # is even + size = 2 * d + rank = get_rank(ring_sizes, size, n_rings) + if rank == UINT_MAX: continue # is not smaller than we have already + + path1 = pid0[ij] + ring = build_ring(path1, path2, d, d) + hash = build_hash(ring, size, hash_size) + if check_ring_existence(hash, ring_hashes, ring_sizes, size, hash_size, n_rings) == 1: + push_ring(rings, ring_hashes, ring_sizes, ring, hash, size, rank, n_rings) + else: + PyMem_Free(ring) + PyMem_Free(hash) + + for k in range(ij + 2, ij + max_paths): + path2 = pid0[k] + if path2 == NULL: break + ring = build_ring(path1, path2, d, d) + hash = build_hash(ring, size, hash_size) + if check_ring_existence(hash, ring_hashes, ring_sizes, size, hash_size, n_rings) == 1: + push_ring(rings, ring_hashes, ring_sizes, ring, hash, size, rank, n_rings) + else: + PyMem_Free(ring) + PyMem_Free(hash) + + # DEBUG + for i in range(n_rings): + ring = rings[i] + if ring == NULL: continue + print(':', [node_mapping[ring[d]] for d in range(ring_sizes[i])]) + PyMem_Free(node_mapping) PyMem_Free(dist) + PyMem_Free(ring_sizes) + free_rings(rings, n_rings) + free_hashes(ring_hashes, n_rings) free_pid(pid0, n_nodes, max_paths) free_pid(pid1, n_nodes, max_paths) From a988dc276ba527f2a6792b0f184d25c423a23350 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sun, 27 Jul 2025 22:50:32 +0200 Subject: [PATCH 04/16] refactored sssr function --- chython/algorithms/_rings.pyx | 184 +++++++++++++++++++++------------- 1 file changed, 112 insertions(+), 72 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index b0837972..92e964a5 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -1,4 +1,11 @@ -# -*- coding: utf-8 -*- +# cython: undeclared_check_usage=error +# cython: warn.undeclared=True +# cython: warn.unused=True +# cython: warn.unused_arg=True +# cython: warn.maybe_uninitialized=True +# cython: boundscheck=False +# cython: wraparound=False + # # Copyright 2025 Ramil Nugmanov # This file is part of chython. @@ -16,6 +23,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # +from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM from cpython.mem cimport PyMem_Malloc, PyMem_Free from libc.limits cimport UINT_MAX from libc.string cimport memset, memcpy @@ -31,6 +39,12 @@ cdef unsigned int ** alloc_pid(size_t n_nodes, size_t max_paths): return pid +cdef unsigned int * alloc_dist(size_t n_nodes): + cdef unsigned int *dist = PyMem_Malloc(n_nodes * n_nodes * sizeof(unsigned int)) + memset(dist, 255, n_nodes * n_nodes * sizeof(unsigned int)) + return dist + + cdef void free_pid(unsigned int **pid, size_t n_nodes, size_t max_paths): cdef size_t i for i in range(n_nodes * n_nodes * max_paths): @@ -136,15 +150,18 @@ cdef unsigned int * build_ring(unsigned int *path1, unsigned int *path2, size_t return path -cdef unsigned long long * build_hash(unsigned int *ring, size_t ring_size, size_t hash_size): +cdef unsigned long long * build_hash(unsigned int *path1, unsigned int *path2, size_t size1, size_t size2, size_t hash_size): cdef size_t i cdef unsigned int node cdef unsigned long long *hash = PyMem_Malloc(hash_size * sizeof(unsigned long long)) memset(hash, 0, hash_size * sizeof(unsigned long long)) - for i in range(ring_size): - node = ring[i] + for i in range(size1): # size is edge count; thus, the last node is dropped + node = path1[i] + hash[node // 64] |= (1 << (node % 64)) + for i in range(size2, 0, -1): # 1st node is dropped + node = path2[i] hash[node // 64] |= (1 << (node % 64)) return hash @@ -199,48 +216,32 @@ cdef void push_ring(unsigned int **rings, unsigned long long **ring_hashes, unsi ring_sizes[rank] = size -def sssr(dict graph, size_t n_rings, size_t max_paths=10): - cdef size_t n_nodes = len(graph), i, j, k, sk, si, ij, kj, ik, size, rank - cdef size_t hash_size = (n_nodes + 63) // 64 - cdef unsigned int d, dki, dkj, dij - cdef object n, m, mb - cdef dict reverse_mapping - cdef unsigned int *path - cdef unsigned int *path1 - cdef unsigned int *path2 +cdef void add_ring_if_unique(unsigned int *path1, unsigned int *path2, + unsigned int **rings, unsigned int *ring_sizes, unsigned long long **ring_hashes, + size_t rank, size_t size, size_t size1, size_t size2, size_t hash_size, size_t n_rings): cdef unsigned int *ring - cdef unsigned long long *hash + cdef unsigned long long * hash = build_hash(path1, path2, size1, size2, hash_size) - cdef unsigned int *node_mapping = PyMem_Malloc(n_nodes * sizeof(unsigned int)) - cdef unsigned int *dist = PyMem_Malloc(n_nodes * n_nodes * sizeof(unsigned int)) - cdef unsigned int **pid0 = alloc_pid(n_nodes, max_paths) - cdef unsigned int **pid1 = alloc_pid(n_nodes, max_paths) + if check_ring_existence(hash, ring_hashes, ring_sizes, size, hash_size, n_rings) == 1: + ring = build_ring(path1, path2, size1, size2) + push_ring(rings, ring_hashes, ring_sizes, ring, hash, size, rank, n_rings) + else: + PyMem_Free(hash) - cdef unsigned int *ring_sizes = PyMem_Malloc(n_rings * sizeof(unsigned int)) - cdef unsigned int **rings = PyMem_Malloc(n_rings * sizeof(unsigned int *)) - cdef unsigned long long **ring_hashes = PyMem_Malloc(n_rings * sizeof(unsigned long long *)) - memset(dist, 255, n_nodes * n_nodes * sizeof(unsigned int)) - memset(ring_sizes, 255, n_rings * sizeof(unsigned int)) - memset(rings, 0, n_rings * sizeof(unsigned int *)) - memset(ring_hashes, 0, n_rings * sizeof(unsigned long long *)) +cdef tuple convert_array_to_tuple(unsigned int *array, unsigned int *node_mapping, size_t size): + cdef size_t i + cdef tuple output = PyTuple_New(size) - reverse_mapping = _PyDict_NewPresized(n_nodes) - for i, n in enumerate(graph): - node_mapping[i] = n - reverse_mapping[n] = i + for i in range(size): + PyTuple_SET_ITEM(output, i, node_mapping[array[i]]) + return output - for n, mb in graph.items(): - i = reverse_mapping[n] - si = i * n_nodes - for m in mb: - j = reverse_mapping[m] - ij = si + j - dist[ij] = 1 - path = PyMem_Malloc(2 * sizeof(unsigned int)) - path[0] = i - path[1] = j - append_path(pid0, ij, max_paths, path) + +cdef void build_pid(unsigned int **pid0, unsigned int **pid1, unsigned int *dist, size_t n_nodes, size_t max_paths): + cdef size_t i, j, k, sk, si, ij, kj, ik + cdef unsigned int d, dki, dkj, dij + cdef unsigned int *path for k in range(n_nodes): sk = k * n_nodes @@ -281,17 +282,19 @@ def sssr(dict graph, size_t n_rings, size_t max_paths=10): path = concatenate_paths(pid0, dist, ik, kj, max_paths) append_path(pid1, ij, max_paths, path) - # DEBUG - # for i in range(n_nodes): - # for j in range(n_nodes): - # if i == j: continue - # for k in range((i * n_nodes + j) * max_paths, (i * n_nodes + j + 1) * max_paths): - # path = pid0[k] - # if path != NULL: - # print('!', node_mapping[i], node_mapping[j], [path[d] for d in range(dist[i * n_nodes + j] + 1)]) - # path = pid1[k] - # if path != NULL: - # print('?', node_mapping[i], node_mapping[j], [path[d] for d in range(dist[i * n_nodes + j] + 2)]) + +cdef void find_rings(unsigned int **rings, unsigned int *ring_sizes, + unsigned int **pid0, unsigned int **pid1, unsigned int *dist, + size_t n_nodes, size_t max_paths, size_t hash_size, size_t n_rings): + cdef size_t i, j, k, si, ij, size, rank + cdef unsigned int d, d1 + cdef unsigned int *path1 + cdef unsigned int *path2 + cdef unsigned long long **ring_hashes = PyMem_Malloc(n_rings * sizeof(unsigned long long *)) + + memset(ring_hashes, 0, n_rings * sizeof(unsigned long long *)) + memset(ring_sizes, 255, n_rings * sizeof(unsigned int)) + memset(rings, 0, n_rings * sizeof(unsigned int *)) for i in range(n_nodes): si = i * n_nodes @@ -304,42 +307,79 @@ def sssr(dict graph, size_t n_rings, size_t max_paths=10): ij *= max_paths path2 = pid0[ij + 1] if path2 == NULL: # is odd? - ... + path2 = pid1[ij] + if path2 == NULL: continue # not ring + size = 2 * d + 1 + rank = get_rank(ring_sizes, size, n_rings) + if rank == UINT_MAX: continue + + d1 = d + 1 + path1 = pid0[ij] + add_ring_if_unique(path1, path2, rings, ring_sizes, ring_hashes, rank, size, d, d1, hash_size, n_rings) + + for k in range(ij + 1, ij + max_paths): + path2 = pid1[k] + if path2 == NULL: break + add_ring_if_unique(path1, path2, rings, ring_sizes, ring_hashes, rank, size, d, d1, hash_size, n_rings) else: # is even size = 2 * d rank = get_rank(ring_sizes, size, n_rings) if rank == UINT_MAX: continue # is not smaller than we have already path1 = pid0[ij] - ring = build_ring(path1, path2, d, d) - hash = build_hash(ring, size, hash_size) - if check_ring_existence(hash, ring_hashes, ring_sizes, size, hash_size, n_rings) == 1: - push_ring(rings, ring_hashes, ring_sizes, ring, hash, size, rank, n_rings) - else: - PyMem_Free(ring) - PyMem_Free(hash) + add_ring_if_unique(path1, path2, rings, ring_sizes, ring_hashes, rank, size, d, d, hash_size, n_rings) for k in range(ij + 2, ij + max_paths): path2 = pid0[k] if path2 == NULL: break - ring = build_ring(path1, path2, d, d) - hash = build_hash(ring, size, hash_size) - if check_ring_existence(hash, ring_hashes, ring_sizes, size, hash_size, n_rings) == 1: - push_ring(rings, ring_hashes, ring_sizes, ring, hash, size, rank, n_rings) - else: - PyMem_Free(ring) - PyMem_Free(hash) - - # DEBUG + add_ring_if_unique(path1, path2, rings, ring_sizes, ring_hashes, rank, size, d, d, hash_size, n_rings) + free_hashes(ring_hashes, n_rings) + + +def sssr(dict graph, size_t n_rings, size_t max_paths=10): + cdef size_t n_nodes = len(graph), i, j, si, ij + cdef size_t hash_size = (n_nodes + 63) // 64 + cdef object n, m, mb + cdef dict reverse_mapping + cdef list output = [] + cdef unsigned int *path + + cdef unsigned int *node_mapping = PyMem_Malloc(n_nodes * sizeof(unsigned int)) + cdef unsigned int *dist = alloc_dist(n_nodes) + cdef unsigned int **pid0 = alloc_pid(n_nodes, max_paths) + cdef unsigned int **pid1 = alloc_pid(n_nodes, max_paths) + + cdef unsigned int *ring_sizes = PyMem_Malloc(n_rings * sizeof(unsigned int)) + cdef unsigned int **rings = PyMem_Malloc(n_rings * sizeof(unsigned int *)) + + reverse_mapping = _PyDict_NewPresized(n_nodes) + for i, n in enumerate(graph): + node_mapping[i] = n + reverse_mapping[n] = i + + for n, mb in graph.items(): + i = reverse_mapping[n] + si = i * n_nodes + for m in mb: + j = reverse_mapping[m] + ij = si + j + dist[ij] = 1 + path = PyMem_Malloc(2 * sizeof(unsigned int)) + path[0] = i + path[1] = j + append_path(pid0, ij, max_paths, path) + + build_pid(pid0, pid1, dist, n_nodes, max_paths) + find_rings(rings, ring_sizes, pid0, pid1, dist, n_nodes, max_paths, hash_size, n_rings) + for i in range(n_rings): - ring = rings[i] - if ring == NULL: continue - print(':', [node_mapping[ring[d]] for d in range(ring_sizes[i])]) + output.append(convert_array_to_tuple(rings[i], node_mapping, ring_sizes[i])) PyMem_Free(node_mapping) PyMem_Free(dist) PyMem_Free(ring_sizes) free_rings(rings, n_rings) - free_hashes(ring_hashes, n_rings) free_pid(pid0, n_nodes, max_paths) free_pid(pid1, n_nodes, max_paths) + + return output From 6c5d3677cc0cd6ac69f86adcfc277de412b772f8 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 8 Aug 2025 19:39:15 +0200 Subject: [PATCH 05/16] saved --- chython/algorithms/_rings.pyx | 228 ++++++++++++++++++---------------- 1 file changed, 124 insertions(+), 104 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 92e964a5..99f775ba 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -24,8 +24,8 @@ # along with this program; if not, see . # from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM -from cpython.mem cimport PyMem_Malloc, PyMem_Free -from libc.limits cimport UINT_MAX +from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc +from libc.limits cimport UINT_MAX, USHRT_MAX from libc.string cimport memset, memcpy @@ -33,41 +33,87 @@ cdef extern from "Python.h": dict _PyDict_NewPresized(Py_ssize_t minused) -cdef unsigned int ** alloc_pid(size_t n_nodes, size_t max_paths): - cdef unsigned int **pid = PyMem_Malloc(n_nodes * n_nodes * max_paths * sizeof(unsigned int *)) - memset(pid, 0, n_nodes * n_nodes * max_paths * sizeof(unsigned int *)) - return pid +cdef struct paths_t: + unsigned short distance + unsigned char num_pid0 + unsigned char num_pid1 + unsigned short **pid0 + unsigned short **pid1 -cdef unsigned int * alloc_dist(size_t n_nodes): - cdef unsigned int *dist = PyMem_Malloc(n_nodes * n_nodes * sizeof(unsigned int)) - memset(dist, 255, n_nodes * n_nodes * sizeof(unsigned int)) - return dist +cdef struct dist_matrix_t: + unsigned short n_nodes + unsigned short *mapping + paths_t *data -cdef void free_pid(unsigned int **pid, size_t n_nodes, size_t max_paths): +cdef dist_matrix_t *alloc_dist_matrix(unsigned short n_nodes): + cdef size_t i, total = n_nodes * n_nodes + cdef dist_matrix_t *matrix = PyMem_Malloc(sizeof(dist_matrix_t)) + + matrix.n_nodes = n_nodes + matrix.mapping = PyMem_Malloc(n_nodes * sizeof(unsigned short)) + matrix.data = PyMem_Malloc(total * sizeof(paths_t)) + # set in paths_t all attrs to 0/NULL + memset(matrix.data, 0, total * sizeof(paths_t)) + + # reset distances to max + for i in range(total): + matrix.data[i].distance = USHRT_MAX + return matrix + + +cdef void append_pid0(paths_t *paths, unsigned short *path): + cdef size_t i = paths.num_pid0 + paths.num_pid0 += 1 + PyMem_Realloc(paths.pid0, paths.num_pid0 * sizeof(unsigned short *)) + paths.pid0[i] = path + + +cdef void append_pid1(paths_t *paths, unsigned short *path): + cdef size_t i = paths.num_pid1 + paths.num_pid1 += 1 + PyMem_Realloc(paths.pid1, paths.num_pid1 * sizeof(unsigned short *)) + paths.pid1[i] = path + + +cdef void clear_pid0(paths_t *paths): cdef size_t i - for i in range(n_nodes * n_nodes * max_paths): - PyMem_Free(pid[i]) - PyMem_Free(pid) + for i in range(paths.num_pid0): + PyMem_Free(paths.pid0[i]) + PyMem_Free(paths.pid0) + paths.num_pid0 = 0 -cdef void free_paths(unsigned int **pid, size_t ij, size_t max_paths): +cdef void clear_pid1(paths_t *paths): cdef size_t i + for i in range(paths.num_pid1): + PyMem_Free(paths.pid1[i]) + PyMem_Free(paths.pid1) + paths.num_pid1 = 0 - ij *= max_paths - for i in range(ij, ij + max_paths): - if pid[i] == NULL: - break - PyMem_Free(pid[i]) - pid[i] = NULL + +cdef void clear_pids(paths_t *paths): + clear_pid0(paths) + clear_pid1(paths) -cdef void free_rings(unsigned int **rings, size_t size): +cdef void free_dist_matrix(dist_matrix_t *matrix): + cdef size_t i, total = matrix.n_nodes * matrix.n_nodes + for i in range(total): + clear_pids(&matrix.data[i]) + PyMem_Free(matrix.mapping) + PyMem_Free(matrix.data) + PyMem_Free(matrix) + + +cdef tuple array_to_tuple(unsigned short *array, unsigned short *mapping, size_t size): cdef size_t i + cdef tuple output = PyTuple_New(size) + for i in range(size): - PyMem_Free(rings[i]) - PyMem_Free(rings) + PyTuple_SET_ITEM(output, i, mapping[array[i]]) + return output cdef void free_hashes(unsigned long long **hashes, size_t size): @@ -88,17 +134,6 @@ cdef void move_paths(unsigned int **pid0, unsigned int **pid1, size_t ij, size_t pid0[i] = NULL -cdef void append_path(unsigned int **pid, size_t ij, size_t max_paths, unsigned int *path): - cdef size_t i - - ij *= max_paths - for i in range(ij, ij + max_paths): - if pid[i] == NULL: - pid[i] = path - return - raise MemoryError('Reached max allowed paths') - - cdef unsigned int * concatenate_paths(unsigned int **pid, unsigned int *dist, size_t i, size_t j, size_t max_paths): cdef unsigned int *path cdef unsigned int size1, size2 @@ -229,54 +264,45 @@ cdef void add_ring_if_unique(unsigned int *path1, unsigned int *path2, PyMem_Free(hash) -cdef tuple convert_array_to_tuple(unsigned int *array, unsigned int *node_mapping, size_t size): - cdef size_t i - cdef tuple output = PyTuple_New(size) - - for i in range(size): - PyTuple_SET_ITEM(output, i, node_mapping[array[i]]) - return output - - -cdef void build_pid(unsigned int **pid0, unsigned int **pid1, unsigned int *dist, size_t n_nodes, size_t max_paths): +cdef void build_pid(dist_matrix_t *matrix): cdef size_t i, j, k, sk, si, ij, kj, ik - cdef unsigned int d, dki, dkj, dij + cdef unsigned int d + cdef paths_t *paths_ki + cdef paths_t *paths_kj + cdef paths_t *paths_ij cdef unsigned int *path - for k in range(n_nodes): - sk = k * n_nodes - for i in range(n_nodes): + for k in range(matrix.n_nodes): + sk = k * matrix.n_nodes + for i in range(matrix.n_nodes): if i == k: continue - dki = dist[sk + i] - if dki == UINT_MAX: continue - - si = i * n_nodes + paths_ki = &matrix.data[sk + i] + if paths_ki.distance == USHRT_MAX: continue + si = i * matrix.n_nodes ik = si + k - for j in range(n_nodes): + for j in range(matrix.n_nodes): if j == k or j == i: continue kj = sk + j - dkj = dist[kj] - if dkj == UINT_MAX: continue + paths_kj = &matrix.data[kj] + if paths_kj.distance == UINT_MAX: continue ij = si + j - dij = dist[ij] - d = dki + dkj - if d < dij: # shorter pid0 path found - dist[ij] = d - - if d == dij - 1 and not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): + paths_ij = &matrix.data[ij] + d = paths_ki.distance + paths_kj.distance + if d < paths_ij.distance: # shorter pid0 path found + if d == paths_ij.distance - 1 and not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): move_paths(pid0, pid1, ij, max_paths) else: # override old paths - free_paths(pid0, ij, max_paths) - free_paths(pid1, ij, max_paths) + clear_pids(paths_ij) + paths_ij.distance = d - path = concatenate_paths(pid0, dist, ik, kj, max_paths) + path = concatenate_paths(pid0, dist, ik, kj) append_path(pid0, ij, max_paths, path) - elif d == dij: # new pid0 path + elif d == paths_ij.distance: # new pid0 path if not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): path = concatenate_paths(pid0, dist, ik, kj, max_paths) append_path(pid0, ij, max_paths, path) - elif d == dij + 1: # new pid1 path + elif d == paths_ij.distance + 1: # new pid1 path if not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): if not has_overlap(pid0, pid1, dist, ik, kj, ij, max_paths, 0): path = concatenate_paths(pid0, dist, ik, kj, max_paths) @@ -336,50 +362,44 @@ cdef void find_rings(unsigned int **rings, unsigned int *ring_sizes, free_hashes(ring_hashes, n_rings) -def sssr(dict graph, size_t n_rings, size_t max_paths=10): - cdef size_t n_nodes = len(graph), i, j, si, ij - cdef size_t hash_size = (n_nodes + 63) // 64 - cdef object n, m, mb - cdef dict reverse_mapping +def sssr(dict graph, size_t n_rings): + cdef size_t si + cdef unsigned short i, n, m, n_nodes = len(graph) + cdef unsigned short [USHRT_MAX] reverse_mapping # 128kb + cdef object mb cdef list output = [] - cdef unsigned int *path - cdef unsigned int *node_mapping = PyMem_Malloc(n_nodes * sizeof(unsigned int)) - cdef unsigned int *dist = alloc_dist(n_nodes) - cdef unsigned int **pid0 = alloc_pid(n_nodes, max_paths) - cdef unsigned int **pid1 = alloc_pid(n_nodes, max_paths) + cdef dist_matrix_t *matrix = alloc_dist_matrix(n_nodes) - cdef unsigned int *ring_sizes = PyMem_Malloc(n_rings * sizeof(unsigned int)) - cdef unsigned int **rings = PyMem_Malloc(n_rings * sizeof(unsigned int *)) - - reverse_mapping = _PyDict_NewPresized(n_nodes) for i, n in enumerate(graph): - node_mapping[i] = n + matrix.mapping[i] = n reverse_mapping[n] = i for n, mb in graph.items(): - i = reverse_mapping[n] - si = i * n_nodes + si = n_nodes * reverse_mapping[n] for m in mb: - j = reverse_mapping[m] - ij = si + j - dist[ij] = 1 - path = PyMem_Malloc(2 * sizeof(unsigned int)) - path[0] = i - path[1] = j - append_path(pid0, ij, max_paths, path) - - build_pid(pid0, pid1, dist, n_nodes, max_paths) - find_rings(rings, ring_sizes, pid0, pid1, dist, n_nodes, max_paths, hash_size, n_rings) - - for i in range(n_rings): - output.append(convert_array_to_tuple(rings[i], node_mapping, ring_sizes[i])) - - PyMem_Free(node_mapping) - PyMem_Free(dist) - PyMem_Free(ring_sizes) - free_rings(rings, n_rings) - free_pid(pid0, n_nodes, max_paths) - free_pid(pid1, n_nodes, max_paths) + matrix.data[si + reverse_mapping[m]].distance = 1 + # build_pid(matrix) + # + # DEBUG + cdef paths_t p + for i in range(n_nodes): + for j in range(n_nodes): + if i == j: continue + p = matrix.data[i * n_nodes + j] + if p.distance == 1: + print('!', matrix.mapping[i], matrix.mapping[j], ()) + for k in range(p.num_pid0): + print('!', matrix.mapping[i], matrix.mapping[j], array_to_tuple(p.pid0[k], matrix.mapping, p.distance - 1)) + for k in range(p.num_pid1): + print('!', matrix.mapping[i], matrix.mapping[j], array_to_tuple(p.pid1[k], matrix.mapping, p.distance)) + + # + # find_rings(rings, ring_sizes, pid0, pid1, dist, n_nodes, max_paths, hash_size, n_rings) + # + # for i in range(n_rings): + # output.append(convert_array_to_tuple(rings[i], node_mapping, ring_sizes[i])) + # + free_dist_matrix(matrix) return output From 095810af73d18038e45fa42304dc3b584c218323 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sun, 17 Aug 2025 14:31:43 +0200 Subject: [PATCH 06/16] fixed make pid basic logic --- chython/algorithms/_rings.pyx | 184 ++++++++++++++++------------------ 1 file changed, 87 insertions(+), 97 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 99f775ba..2ddc2d23 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -23,27 +23,24 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, see . # -from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc from libc.limits cimport UINT_MAX, USHRT_MAX from libc.string cimport memset, memcpy -cdef extern from "Python.h": - dict _PyDict_NewPresized(Py_ssize_t minused) - - cdef struct paths_t: - unsigned short distance - unsigned char num_pid0 - unsigned char num_pid1 - unsigned short **pid0 - unsigned short **pid1 + # store PID units. paths store only non-terminal nodes + # in the case of distances equals 1 - don't store any paths + unsigned short distance # bonds count on the shortest path between nodes + unsigned char num_pid0 # shortest paths count + unsigned char num_pid1 # shortest+1 paths count + unsigned short **pid0 # array of pointers to the shortest paths arrays + unsigned short **pid1 # array of pointers to the shortest+1 paths arrays cdef struct dist_matrix_t: unsigned short n_nodes - unsigned short *mapping + unsigned short *mapping # idx to node label mapping paths_t *data @@ -63,34 +60,24 @@ cdef dist_matrix_t *alloc_dist_matrix(unsigned short n_nodes): return matrix -cdef void append_pid0(paths_t *paths, unsigned short *path): - cdef size_t i = paths.num_pid0 - paths.num_pid0 += 1 - PyMem_Realloc(paths.pid0, paths.num_pid0 * sizeof(unsigned short *)) - paths.pid0[i] = path - - -cdef void append_pid1(paths_t *paths, unsigned short *path): - cdef size_t i = paths.num_pid1 - paths.num_pid1 += 1 - PyMem_Realloc(paths.pid1, paths.num_pid1 * sizeof(unsigned short *)) - paths.pid1[i] = path - - cdef void clear_pid0(paths_t *paths): cdef size_t i + if paths.num_pid0 == 0: return for i in range(paths.num_pid0): PyMem_Free(paths.pid0[i]) PyMem_Free(paths.pid0) paths.num_pid0 = 0 + paths.pid0 = NULL cdef void clear_pid1(paths_t *paths): cdef size_t i + if paths.num_pid1 == 0: return for i in range(paths.num_pid1): PyMem_Free(paths.pid1[i]) PyMem_Free(paths.pid1) paths.num_pid1 = 0 + paths.pid1 = NULL cdef void clear_pids(paths_t *paths): @@ -107,65 +94,75 @@ cdef void free_dist_matrix(dist_matrix_t *matrix): PyMem_Free(matrix) -cdef tuple array_to_tuple(unsigned short *array, unsigned short *mapping, size_t size): - cdef size_t i - cdef tuple output = PyTuple_New(size) +cdef void move_paths(paths_t *paths): + clear_pid1(paths) - for i in range(size): - PyTuple_SET_ITEM(output, i, mapping[array[i]]) - return output + paths.num_pid1 = paths.num_pid0 + paths.pid1 = paths.pid0 + paths.num_pid0 = 0 + paths.pid0 = NULL -cdef void free_hashes(unsigned long long **hashes, size_t size): - cdef size_t i - for i in range(size): - PyMem_Free(hashes[i]) - PyMem_Free(hashes) +cdef unsigned short *concatenate_paths(paths_t *paths_ik, paths_t *paths_kj, unsigned short k): + cdef unsigned short *path + cdef size_t size1, size2 + path = PyMem_Malloc((paths_ik.distance + paths_kj.distance - 1) * sizeof(unsigned short)) -cdef void move_paths(unsigned int **pid0, unsigned int **pid1, size_t ij, size_t max_paths): - cdef size_t i + size1 = paths_ik.distance - 1 + size2 = paths_kj.distance - 1 + path[size1] = k # put node k to form continuous i-k-j path excluding i,j terminals + if size1: + memcpy(path, paths_ik.pid0[0], size1 * sizeof(unsigned short)) # copy nodes between i-k + if size2: + memcpy(path + paths_ik.distance, paths_kj.pid0[0], size2 * sizeof(unsigned short)) + return path - ij *= max_paths - for i in range(ij, ij + max_paths): - # run max_path times to make sure all pid1 paths overridden - PyMem_Free(pid1[i]) - pid1[i] = pid0[i] - pid0[i] = NULL +cdef void append_pid0(paths_t *paths_ij, paths_t *paths_ik, paths_t *paths_kj, unsigned short k): + cdef size_t i = paths_ij.num_pid0 + paths_ij.num_pid0 += 1 + paths_ij.pid0 = PyMem_Realloc(paths_ij.pid0, paths_ij.num_pid0 * sizeof(unsigned short *)) + paths_ij.pid0[i] = concatenate_paths(paths_ik, paths_kj, k) -cdef unsigned int * concatenate_paths(unsigned int **pid, unsigned int *dist, size_t i, size_t j, size_t max_paths): - cdef unsigned int *path - cdef unsigned int size1, size2 - size1 = dist[i] - size2 = dist[j] + 1 # sizes are edges counts in a path, not nodes, thus, +1 +cdef void append_pid1(paths_t *paths_ij, paths_t *paths_ik, paths_t *paths_kj, unsigned short k): + cdef size_t i = paths_ij.num_pid1 + paths_ij.num_pid1 += 1 + paths_ij.pid1 = PyMem_Realloc(paths_ij.pid1, paths_ij.num_pid1 * sizeof(unsigned short *)) + paths_ij.pid1[i] = concatenate_paths(paths_ik, paths_kj, k) - path = PyMem_Malloc((size1 + size2) * sizeof(unsigned int)) - memcpy(path, pid[i * max_paths], size1 * sizeof(unsigned int)) # dropping last node - memcpy(path + size1, pid[j * max_paths], size2 * sizeof(unsigned int)) - return path +cdef tuple ring_to_tuple(unsigned short *ring, size_t size): + cdef size_t i + return tuple(ring[i] for i in range(size)) -cdef int has_overlap(unsigned int **pid0, unsigned int **pid1, unsigned int *dist, - size_t i, size_t j, size_t k, size_t max_paths, size_t shift): - cdef size_t n - cdef unsigned int n1, n2 - cdef unsigned int d - cdef unsigned int *path +cdef void free_hashes(unsigned long long **hashes, size_t size): + cdef size_t i + for i in range(size): + PyMem_Free(hashes[i]) + PyMem_Free(hashes) + + +cdef int has_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij, int test_pid1): + cdef size_t i, j + cdef unsigned short n1, n2 + cdef unsigned short *path - n1 = pid0[i * max_paths][1] - n2 = pid0[j * max_paths][dist[j] - 1] + n1 = paths_ik.pid0[0][1] + n2 = paths_kj.pid0[0][paths_kj.distance - 1] - d = dist[k] - shift - k *= max_paths - for n in range(k, k + max_paths): - path = pid1[n] - if path == NULL: - return 0 - if path[1] == n1 or path[d] == n2: + j = paths_ij.distance - 1 + for i in range(paths_ij.num_pid0): + path = paths_ij.pid0[i] + if path[1] == n1 or path[j] == n2: return 1 + if test_pid1: + for i in range(paths_ij.num_pid1): + path = paths_ij.pid1[i] + if path[1] == n1 or path[paths_ij.distance] == n2: + return 1 return 0 @@ -265,48 +262,41 @@ cdef void add_ring_if_unique(unsigned int *path1, unsigned int *path2, cdef void build_pid(dist_matrix_t *matrix): - cdef size_t i, j, k, sk, si, ij, kj, ik + cdef size_t i, j, k, sk, si cdef unsigned int d - cdef paths_t *paths_ki cdef paths_t *paths_kj cdef paths_t *paths_ij - cdef unsigned int *path + cdef paths_t *paths_ik for k in range(matrix.n_nodes): sk = k * matrix.n_nodes for i in range(matrix.n_nodes): if i == k: continue - paths_ki = &matrix.data[sk + i] - if paths_ki.distance == USHRT_MAX: continue si = i * matrix.n_nodes - ik = si + k + paths_ik = &matrix.data[si + k] + if paths_ik.distance == USHRT_MAX: continue + for j in range(matrix.n_nodes): if j == k or j == i: continue - kj = sk + j - paths_kj = &matrix.data[kj] - if paths_kj.distance == UINT_MAX: continue + paths_kj = &matrix.data[sk + j] + if paths_kj.distance == USHRT_MAX: continue - ij = si + j - paths_ij = &matrix.data[ij] - d = paths_ki.distance + paths_kj.distance + paths_ij = &matrix.data[si + j] + d = paths_ik.distance + paths_kj.distance if d < paths_ij.distance: # shorter pid0 path found - if d == paths_ij.distance - 1 and not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): - move_paths(pid0, pid1, ij, max_paths) - else: # override old paths + if d == paths_ij.distance - 1: # and not has_overlap(paths_ik, paths_kj, paths_ij, 0): + move_paths(paths_ij) + else: # drop old paths clear_pids(paths_ij) - paths_ij.distance = d - path = concatenate_paths(pid0, dist, ik, kj) - append_path(pid0, ij, max_paths, path) + paths_ij.distance = d + append_pid0(paths_ij, paths_ik, paths_kj, matrix.mapping[k]) elif d == paths_ij.distance: # new pid0 path - if not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): - path = concatenate_paths(pid0, dist, ik, kj, max_paths) - append_path(pid0, ij, max_paths, path) + #if not has_overlap(paths_ik, paths_kj, paths_ij, 0): + append_pid0(paths_ij, paths_ik, paths_kj, matrix.mapping[k]) elif d == paths_ij.distance + 1: # new pid1 path - if not has_overlap(pid0, pid0, dist, ik, kj, ij, max_paths, 1): - if not has_overlap(pid0, pid1, dist, ik, kj, ij, max_paths, 0): - path = concatenate_paths(pid0, dist, ik, kj, max_paths) - append_path(pid1, ij, max_paths, path) + #if not has_overlap(paths_ik, paths_kj, paths_ij, 1): + append_pid1(paths_ij, paths_ik, paths_kj, matrix.mapping[k]) cdef void find_rings(unsigned int **rings, unsigned int *ring_sizes, @@ -380,7 +370,7 @@ def sssr(dict graph, size_t n_rings): for m in mb: matrix.data[si + reverse_mapping[m]].distance = 1 - # build_pid(matrix) + build_pid(matrix) # # DEBUG cdef paths_t p @@ -389,11 +379,11 @@ def sssr(dict graph, size_t n_rings): if i == j: continue p = matrix.data[i * n_nodes + j] if p.distance == 1: - print('!', matrix.mapping[i], matrix.mapping[j], ()) + print('!', matrix.mapping[i], matrix.mapping[j]) for k in range(p.num_pid0): - print('!', matrix.mapping[i], matrix.mapping[j], array_to_tuple(p.pid0[k], matrix.mapping, p.distance - 1)) + print('?', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid0[k], p.distance - 1)) for k in range(p.num_pid1): - print('!', matrix.mapping[i], matrix.mapping[j], array_to_tuple(p.pid1[k], matrix.mapping, p.distance)) + print('$', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid1[k], p.distance)) # # find_rings(rings, ring_sizes, pid0, pid1, dist, n_nodes, max_paths, hash_size, n_rings) From e2c836db9ecea578c1bb559e350e0e17e3b887a1 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sun, 17 Aug 2025 15:03:17 +0200 Subject: [PATCH 07/16] build pid implemented --- chython/algorithms/_rings.pyx | 39 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 2ddc2d23..8d0eb9b4 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -145,25 +145,32 @@ cdef void free_hashes(unsigned long long **hashes, size_t size): PyMem_Free(hashes) -cdef int has_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij, int test_pid1): +cdef int has_not_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij, unsigned short k, int test_pid1): cdef size_t i, j cdef unsigned short n1, n2 cdef unsigned short *path - n1 = paths_ik.pid0[0][1] - n2 = paths_kj.pid0[0][paths_kj.distance - 1] + if paths_ik.distance == 1: + n1 = k + else: + n1 = paths_ik.pid0[0][0] + if paths_kj.distance == 1: + n2 = k + else: + n2 = paths_kj.pid0[0][paths_kj.distance - 2] - j = paths_ij.distance - 1 + j = paths_ij.distance - 2 for i in range(paths_ij.num_pid0): path = paths_ij.pid0[i] - if path[1] == n1 or path[j] == n2: - return 1 + if path[0] == n1 or path[j] == n2: + return 0 if test_pid1: + j = paths_ij.distance - 1 for i in range(paths_ij.num_pid1): path = paths_ij.pid1[i] - if path[1] == n1 or path[paths_ij.distance] == n2: - return 1 - return 0 + if path[0] == n1 or path[j] == n2: + return 0 + return 1 cdef unsigned int * build_ring(unsigned int *path1, unsigned int *path2, size_t size1, size_t size2): @@ -263,6 +270,7 @@ cdef void add_ring_if_unique(unsigned int *path1, unsigned int *path2, cdef void build_pid(dist_matrix_t *matrix): cdef size_t i, j, k, sk, si + cdef unsigned short rk cdef unsigned int d cdef paths_t *paths_kj cdef paths_t *paths_ij @@ -270,6 +278,7 @@ cdef void build_pid(dist_matrix_t *matrix): for k in range(matrix.n_nodes): sk = k * matrix.n_nodes + rk = matrix.mapping[k] for i in range(matrix.n_nodes): if i == k: continue si = i * matrix.n_nodes @@ -284,19 +293,19 @@ cdef void build_pid(dist_matrix_t *matrix): paths_ij = &matrix.data[si + j] d = paths_ik.distance + paths_kj.distance if d < paths_ij.distance: # shorter pid0 path found - if d == paths_ij.distance - 1: # and not has_overlap(paths_ik, paths_kj, paths_ij, 0): + if d == paths_ij.distance - 1 and has_not_overlap(paths_ik, paths_kj, paths_ij, rk, 0): move_paths(paths_ij) else: # drop old paths clear_pids(paths_ij) paths_ij.distance = d - append_pid0(paths_ij, paths_ik, paths_kj, matrix.mapping[k]) + append_pid0(paths_ij, paths_ik, paths_kj, rk) elif d == paths_ij.distance: # new pid0 path - #if not has_overlap(paths_ik, paths_kj, paths_ij, 0): - append_pid0(paths_ij, paths_ik, paths_kj, matrix.mapping[k]) + if has_not_overlap(paths_ik, paths_kj, paths_ij, rk, 0): + append_pid0(paths_ij, paths_ik, paths_kj, rk) elif d == paths_ij.distance + 1: # new pid1 path - #if not has_overlap(paths_ik, paths_kj, paths_ij, 1): - append_pid1(paths_ij, paths_ik, paths_kj, matrix.mapping[k]) + if has_not_overlap(paths_ik, paths_kj, paths_ij, rk, 1): + append_pid1(paths_ij, paths_ik, paths_kj, rk) cdef void find_rings(unsigned int **rings, unsigned int *ring_sizes, From 2c7fd0b2e436e0886f4df0c194abefd269224ba9 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sun, 17 Aug 2025 23:45:18 +0200 Subject: [PATCH 08/16] ring building mail logic implemented --- chython/algorithms/_rings.pyx | 348 +++++++++++++++++++--------------- 1 file changed, 193 insertions(+), 155 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 8d0eb9b4..cadfc678 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -24,10 +24,14 @@ # along with this program; if not, see . # from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc -from libc.limits cimport UINT_MAX, USHRT_MAX +from libc.limits cimport USHRT_MAX from libc.string cimport memset, memcpy +cdef enum: + ULL_BITS = sizeof(unsigned long long) * 8 + + cdef struct paths_t: # store PID units. paths store only non-terminal nodes # in the case of distances equals 1 - don't store any paths @@ -44,6 +48,19 @@ cdef struct dist_matrix_t: paths_t *data +cdef struct ring_t: + unsigned short n_nodes + unsigned short *nodes + unsigned long long *hash + + +cdef struct rings_t: + unsigned short n_rings + unsigned short n_allocated + unsigned short hash_size + ring_t *rings + + cdef dist_matrix_t *alloc_dist_matrix(unsigned short n_nodes): cdef size_t i, total = n_nodes * n_nodes cdef dist_matrix_t *matrix = PyMem_Malloc(sizeof(dist_matrix_t)) @@ -60,7 +77,22 @@ cdef dist_matrix_t *alloc_dist_matrix(unsigned short n_nodes): return matrix -cdef void clear_pid0(paths_t *paths): +cdef rings_t *alloc_rings(unsigned short n_rings, size_t hash_size): + cdef size_t i + cdef rings_t *rings = PyMem_Malloc(sizeof(rings_t)) + + rings.n_rings = n_rings + rings.n_allocated = 0 + rings.hash_size = hash_size + rings.rings = PyMem_Malloc(n_rings * sizeof(ring_t)) + memset(rings.rings, 0, n_rings * sizeof(ring_t)) + + for i in range(n_rings): + rings.rings[i].n_nodes = USHRT_MAX + return rings + + +cdef void free_pid0(paths_t *paths): cdef size_t i if paths.num_pid0 == 0: return for i in range(paths.num_pid0): @@ -70,7 +102,7 @@ cdef void clear_pid0(paths_t *paths): paths.pid0 = NULL -cdef void clear_pid1(paths_t *paths): +cdef void free_pid1(paths_t *paths): cdef size_t i if paths.num_pid1 == 0: return for i in range(paths.num_pid1): @@ -80,22 +112,37 @@ cdef void clear_pid1(paths_t *paths): paths.pid1 = NULL -cdef void clear_pids(paths_t *paths): - clear_pid0(paths) - clear_pid1(paths) +cdef void free_pids(paths_t *paths): + free_pid0(paths) + free_pid1(paths) cdef void free_dist_matrix(dist_matrix_t *matrix): cdef size_t i, total = matrix.n_nodes * matrix.n_nodes for i in range(total): - clear_pids(&matrix.data[i]) + free_pids(&matrix.data[i]) PyMem_Free(matrix.mapping) PyMem_Free(matrix.data) PyMem_Free(matrix) +cdef void free_ring(ring_t *ring): + PyMem_Free(ring.hash) + PyMem_Free(ring.nodes) + ring.hash = NULL + ring.nodes = NULL + + +cdef void free_rings(rings_t *rings): + cdef size_t i + for i in range(rings.n_rings): + free_ring(&rings.rings[i]) + PyMem_Free(rings.rings) + PyMem_Free(rings) + + cdef void move_paths(paths_t *paths): - clear_pid1(paths) + free_pid1(paths) paths.num_pid1 = paths.num_pid0 paths.pid1 = paths.pid0 @@ -138,13 +185,6 @@ cdef tuple ring_to_tuple(unsigned short *ring, size_t size): return tuple(ring[i] for i in range(size)) -cdef void free_hashes(unsigned long long **hashes, size_t size): - cdef size_t i - for i in range(size): - PyMem_Free(hashes[i]) - PyMem_Free(hashes) - - cdef int has_not_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij, unsigned short k, int test_pid1): cdef size_t i, j cdef unsigned short n1, n2 @@ -173,99 +213,84 @@ cdef int has_not_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij return 1 -cdef unsigned int * build_ring(unsigned int *path1, unsigned int *path2, size_t size1, size_t size2): - cdef size_t i - cdef unsigned int *path - - # size 1 and 2 are edges counts. paths have 2 overlapped atoms - path = PyMem_Malloc((size1 + size2) * sizeof(unsigned int)) - - memcpy(path, path1, size1 * sizeof(unsigned int)) # drop last node - - # inverse second path - for i in range(size2, 0, -1): # drop first node - path[size1] = path2[i] - size1 += 1 - return path - - -cdef unsigned long long * build_hash(unsigned int *path1, unsigned int *path2, size_t size1, size_t size2, size_t hash_size): - cdef size_t i +cdef unsigned long long *build_hash(unsigned short *path1, unsigned short *path2, + unsigned short i, unsigned short j, size_t size1, size_t size2, size_t hash_size): + cdef size_t k cdef unsigned int node cdef unsigned long long *hash = PyMem_Malloc(hash_size * sizeof(unsigned long long)) memset(hash, 0, hash_size * sizeof(unsigned long long)) - - for i in range(size1): # size is edge count; thus, the last node is dropped - node = path1[i] - hash[node // 64] |= (1 << (node % 64)) - for i in range(size2, 0, -1): # 1st node is dropped - node = path2[i] - hash[node // 64] |= (1 << (node % 64)) + hash[i // ULL_BITS] |= (1 << (i % ULL_BITS)) # add linker node + hash[j // ULL_BITS] |= (1 << (j % ULL_BITS)) + + for k in range(size1): + node = path1[k] + hash[node // ULL_BITS] |= (1 << (node % ULL_BITS)) + for k in range(size2): + node = path2[k] + hash[node // ULL_BITS] |= (1 << (node % ULL_BITS)) return hash -cdef size_t get_rank(unsigned int *rings, size_t size, size_t n_rings): +cdef size_t get_rank(rings_t *rings, size_t size): cdef size_t i - for i in range(n_rings): - if size < rings[i]: + for i in range(rings.n_rings): + if size < rings.rings[i].n_nodes: return i - return UINT_MAX + return USHRT_MAX -cdef int compare_rings(unsigned long long *hash1, unsigned long long *ring2, size_t hash_size): - cdef size_t i - for i in range(hash_size): - if hash1[i] ^ ring2[i] != 0: - return 1 # doesn't match - return 0 - - -cdef int check_ring_existence(unsigned long long *ring, unsigned long long **rings, unsigned int *ring_sizes, - size_t ring_size, size_t hash_size, size_t n_rings): - cdef size_t i - cdef unsigned int size - for i in range(n_rings): - size = ring_sizes[i] - if size == ring_size: - if compare_rings(ring, rings[i], hash_size) == 0: +cdef int is_unique(ring_t *ring, rings_t *rings): + cdef size_t i, j + cdef int hash_match + cdef ring_t *other + + for i in range(rings.n_rings): + other = &rings.rings[i] + if ring.n_nodes == other.n_nodes: + hash_match = 1 + for j in range(rings.hash_size): + if ring.hash[j] ^ other.hash[j] != 0: + hash_match = 0 + break + if hash_match: return 0 - elif size > ring_size: + elif ring.n_nodes < other.n_nodes: return 1 - return 1 - + return 0 -cdef void push_ring(unsigned int **rings, unsigned long long **ring_hashes, unsigned int *ring_sizes, - unsigned int *ring, unsigned long long *hash, unsigned int size, size_t rank, size_t n_rings): - cdef size_t i, i1, n1 - n1 = n_rings - 1 - PyMem_Free(rings[n1]) - PyMem_Free(ring_hashes[n1]) +cdef unsigned short *build_ring(unsigned short *path1, unsigned short *path2, + unsigned short i, unsigned short j, size_t size1, size_t size2): + cdef size_t k, s = size1 + 1 + cdef unsigned short *nodes - for i in range(n1, rank, -1): - i1 = i - 1 - rings[i] = rings[i1] - ring_hashes[i] = ring_hashes[i1] - ring_sizes[i] = ring_sizes[i1] + nodes = PyMem_Malloc((size1 + size2 + 2) * sizeof(unsigned short)) + nodes[0] = i + nodes[s] = j + memcpy(nodes + 1, path1, size1 * sizeof(unsigned short)) - # Insert the new ring at the rank position - rings[rank] = ring - ring_hashes[rank] = hash - ring_sizes[rank] = size + # inverse second path + for k in range(size2 - 1, -1, -1): + s += 1 + nodes[s] = path2[k] + return nodes -cdef void add_ring_if_unique(unsigned int *path1, unsigned int *path2, - unsigned int **rings, unsigned int *ring_sizes, unsigned long long **ring_hashes, - size_t rank, size_t size, size_t size1, size_t size2, size_t hash_size, size_t n_rings): - cdef unsigned int *ring - cdef unsigned long long * hash = build_hash(path1, path2, size1, size2, hash_size) +cdef void push_ring(ring_t ring, rings_t *rings, unsigned short rank): + cdef size_t i - if check_ring_existence(hash, ring_hashes, ring_sizes, size, hash_size, n_rings) == 1: - ring = build_ring(path1, path2, size1, size2) - push_ring(rings, ring_hashes, ring_sizes, ring, hash, size, rank, n_rings) + if rings.n_allocated == rings.n_rings: + # drop last ring + free_ring(&rings.rings[rings.n_rings - 1]) else: - PyMem_Free(hash) + rings.n_allocated += 1 + + for i in range(rings.n_allocated - 1, rank, -1): + rings.rings[i] = rings.rings[i - 1] + + # insert the new ring at the rank position + rings.rings[rank] = ring cdef void build_pid(dist_matrix_t *matrix): @@ -296,7 +321,7 @@ cdef void build_pid(dist_matrix_t *matrix): if d == paths_ij.distance - 1 and has_not_overlap(paths_ik, paths_kj, paths_ij, rk, 0): move_paths(paths_ij) else: # drop old paths - clear_pids(paths_ij) + free_pids(paths_ij) paths_ij.distance = d append_pid0(paths_ij, paths_ik, paths_kj, rk) @@ -308,63 +333,72 @@ cdef void build_pid(dist_matrix_t *matrix): append_pid1(paths_ij, paths_ik, paths_kj, rk) -cdef void find_rings(unsigned int **rings, unsigned int *ring_sizes, - unsigned int **pid0, unsigned int **pid1, unsigned int *dist, - size_t n_nodes, size_t max_paths, size_t hash_size, size_t n_rings): - cdef size_t i, j, k, si, ij, size, rank - cdef unsigned int d, d1 - cdef unsigned int *path1 - cdef unsigned int *path2 - cdef unsigned long long **ring_hashes = PyMem_Malloc(n_rings * sizeof(unsigned long long *)) - - memset(ring_hashes, 0, n_rings * sizeof(unsigned long long *)) - memset(ring_sizes, 255, n_rings * sizeof(unsigned int)) - memset(rings, 0, n_rings * sizeof(unsigned int *)) +cdef void find_rings(dist_matrix_t *matrix, rings_t *rings): + cdef size_t i, j, k, si + cdef unsigned short rank + cdef ring_t ring + cdef paths_t *path - for i in range(n_nodes): - si = i * n_nodes - for j in range(n_nodes): + for i in range(matrix.n_nodes): + si = i * matrix.n_nodes + for j in range(matrix.n_nodes): if i == j: continue - ij = si + j - d = dist[ij] - if d == UINT_MAX: continue # different components - - ij *= max_paths - path2 = pid0[ij + 1] - if path2 == NULL: # is odd? - path2 = pid1[ij] - if path2 == NULL: continue # not ring - size = 2 * d + 1 - rank = get_rank(ring_sizes, size, n_rings) - if rank == UINT_MAX: continue - - d1 = d + 1 - path1 = pid0[ij] - add_ring_if_unique(path1, path2, rings, ring_sizes, ring_hashes, rank, size, d, d1, hash_size, n_rings) - - for k in range(ij + 1, ij + max_paths): - path2 = pid1[k] - if path2 == NULL: break - add_ring_if_unique(path1, path2, rings, ring_sizes, ring_hashes, rank, size, d, d1, hash_size, n_rings) + path = &matrix.data[si + j] + if path.distance == USHRT_MAX: continue # different components + elif path.distance == 1: # triangles + if not path.num_pid1: continue + rank = get_rank(rings, 3) + if rank == USHRT_MAX: continue # ring is not in the smallest set + ring.n_nodes = 3 + for k in range(path.num_pid1): + ring.hash = build_hash(NULL, path.pid1[k], matrix.mapping[i], matrix.mapping[j], + 0, 1, rings.hash_size) + if rank == 0 or is_unique(&ring, rings): + ring.nodes = PyMem_Malloc(3 * sizeof(unsigned short)) + ring.nodes[0] = matrix.mapping[i] + ring.nodes[1] = matrix.mapping[j] + ring.nodes[2] = path.pid1[k][0] + push_ring(ring, rings, rank) + print('$', i, j, rank, ring_to_tuple(rings.rings[rank].nodes, rings.rings[rank].n_nodes)) + else: + PyMem_Free(ring.hash) + elif path.num_pid0 == 1: # is odd? + if not path.num_pid1: continue + ring.n_nodes = 2 * path.distance + 1 + rank = get_rank(rings, ring.n_nodes) + if rank == USHRT_MAX: continue # ring is not in the smallest set + for k in range(path.num_pid1): + ring.hash = build_hash(path.pid0[0], path.pid1[k], matrix.mapping[i], matrix.mapping[j], + path.distance - 1, path.distance, rings.hash_size) + if rank == 0 or is_unique(&ring, rings): + print('%', i, j, rank) + ring.nodes = build_ring(path.pid0[0], path.pid1[k], matrix.mapping[i], matrix.mapping[j], + path.distance - 1, path.distance) + push_ring(ring, rings, rank) + else: + PyMem_Free(ring.hash) else: # is even - size = 2 * d - rank = get_rank(ring_sizes, size, n_rings) - if rank == UINT_MAX: continue # is not smaller than we have already - - path1 = pid0[ij] - add_ring_if_unique(path1, path2, rings, ring_sizes, ring_hashes, rank, size, d, d, hash_size, n_rings) - - for k in range(ij + 2, ij + max_paths): - path2 = pid0[k] - if path2 == NULL: break - add_ring_if_unique(path1, path2, rings, ring_sizes, ring_hashes, rank, size, d, d, hash_size, n_rings) - free_hashes(ring_hashes, n_rings) + ring.n_nodes = 2 * path.distance + rank = get_rank(rings, ring.n_nodes) + if rank == USHRT_MAX: continue + for k in range(1, path.num_pid0): + ring.hash = build_hash(path.pid0[k - 1], path.pid0[k], matrix.mapping[i], matrix.mapping[j], + path.distance - 1, path.distance - 1, rings.hash_size) + if rank == 0 or is_unique(&ring, rings): + ring.nodes = build_ring(path.pid0[k - 1], path.pid0[k], matrix.mapping[i], matrix.mapping[j], + path.distance - 1, path.distance - 1) + push_ring(ring, rings, rank) + print('@', i, j, rank, ring_to_tuple(rings.rings[rank].nodes, rings.rings[rank].n_nodes)) + else: + PyMem_Free(ring.hash) def sssr(dict graph, size_t n_rings): cdef size_t si - cdef unsigned short i, n, m, n_nodes = len(graph) + cdef unsigned short i, n, m, n_max = 0, n_nodes = len(graph) cdef unsigned short [USHRT_MAX] reverse_mapping # 128kb + cdef ring_t ring + cdef rings_t *rings cdef object mb cdef list output = [] @@ -373,32 +407,36 @@ def sssr(dict graph, size_t n_rings): for i, n in enumerate(graph): matrix.mapping[i] = n reverse_mapping[n] = i + if n > n_max: n_max = n for n, mb in graph.items(): si = n_nodes * reverse_mapping[n] for m in mb: matrix.data[si + reverse_mapping[m]].distance = 1 + rings = alloc_rings(n_rings, (n_max + ULL_BITS - 1) // ULL_BITS) + + # run PID matrix calculation build_pid(matrix) - # + # DEBUG - cdef paths_t p - for i in range(n_nodes): - for j in range(n_nodes): - if i == j: continue - p = matrix.data[i * n_nodes + j] - if p.distance == 1: - print('!', matrix.mapping[i], matrix.mapping[j]) - for k in range(p.num_pid0): - print('?', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid0[k], p.distance - 1)) - for k in range(p.num_pid1): - print('$', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid1[k], p.distance)) - - # - # find_rings(rings, ring_sizes, pid0, pid1, dist, n_nodes, max_paths, hash_size, n_rings) - # + # cdef paths_t p + # for i in range(n_nodes): + # for j in range(n_nodes): + # if i == j: continue + # p = matrix.data[i * n_nodes + j] + # if p.distance == 1: + # print('!', matrix.mapping[i], matrix.mapping[j]) + # for k in range(p.num_pid0): + # print('?', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid0[k], p.distance - 1)) + # for k in range(p.num_pid1): + # print('$', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid1[k], p.distance)) + + find_rings(matrix, rings) # for i in range(n_rings): - # output.append(convert_array_to_tuple(rings[i], node_mapping, ring_sizes[i])) - # + # ring = rings.rings[i] + # output.append(ring_to_tuple(ring.nodes, ring.n_nodes)) + free_dist_matrix(matrix) + free_rings(rings) return output From f776c39806363fa4192ee0d5d7c21229217acf9f Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Mon, 18 Aug 2025 00:07:49 +0200 Subject: [PATCH 09/16] find_ring refactored. todo: implement unique ring identification and fix push --- chython/algorithms/_rings.pyx | 117 +++++++++++++++------------------- 1 file changed, 50 insertions(+), 67 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index cadfc678..5591b056 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -232,36 +232,8 @@ cdef unsigned long long *build_hash(unsigned short *path1, unsigned short *path2 return hash -cdef size_t get_rank(rings_t *rings, size_t size): - cdef size_t i - for i in range(rings.n_rings): - if size < rings.rings[i].n_nodes: - return i - return USHRT_MAX - - -cdef int is_unique(ring_t *ring, rings_t *rings): - cdef size_t i, j - cdef int hash_match - cdef ring_t *other - - for i in range(rings.n_rings): - other = &rings.rings[i] - if ring.n_nodes == other.n_nodes: - hash_match = 1 - for j in range(rings.hash_size): - if ring.hash[j] ^ other.hash[j] != 0: - hash_match = 0 - break - if hash_match: - return 0 - elif ring.n_nodes < other.n_nodes: - return 1 - return 0 - - cdef unsigned short *build_ring(unsigned short *path1, unsigned short *path2, - unsigned short i, unsigned short j, size_t size1, size_t size2): + unsigned short i, unsigned short j, size_t size1, size_t size2): cdef size_t k, s = size1 + 1 cdef unsigned short *nodes @@ -277,7 +249,7 @@ cdef unsigned short *build_ring(unsigned short *path1, unsigned short *path2, return nodes -cdef void push_ring(ring_t ring, rings_t *rings, unsigned short rank): +cdef void push_ring(ring_t ring, rings_t *rings): cdef size_t i if rings.n_allocated == rings.n_rings: @@ -293,6 +265,26 @@ cdef void push_ring(ring_t ring, rings_t *rings, unsigned short rank): rings.rings[rank] = ring +cdef int is_unique(ring_t *ring, rings_t *rings): + cdef size_t i, j + cdef int hash_match + cdef ring_t *other + + for i in range(rings.n_rings): + other = &rings.rings[i] + if ring.n_nodes == other.n_nodes: + hash_match = 1 + for j in range(rings.hash_size): + if ring.hash[j] ^ other.hash[j] != 0: + hash_match = 0 + break + if hash_match: + return 0 + elif ring.n_nodes < other.n_nodes: + return 1 + return 0 + + cdef void build_pid(dist_matrix_t *matrix): cdef size_t i, j, k, sk, si cdef unsigned short rk @@ -335,60 +327,51 @@ cdef void build_pid(dist_matrix_t *matrix): cdef void find_rings(dist_matrix_t *matrix, rings_t *rings): cdef size_t i, j, k, si - cdef unsigned short rank + cdef unsigned short ri, rj, d cdef ring_t ring cdef paths_t *path for i in range(matrix.n_nodes): si = i * matrix.n_nodes + ri = matrix.mapping[i] for j in range(matrix.n_nodes): if i == j: continue path = &matrix.data[si + j] - if path.distance == USHRT_MAX: continue # different components - elif path.distance == 1: # triangles + d = path.distance + if d == USHRT_MAX: continue # different components + rj = matrix.mapping[j] + + if d == 1: # triangles if not path.num_pid1: continue - rank = get_rank(rings, 3) - if rank == USHRT_MAX: continue # ring is not in the smallest set ring.n_nodes = 3 for k in range(path.num_pid1): - ring.hash = build_hash(NULL, path.pid1[k], matrix.mapping[i], matrix.mapping[j], - 0, 1, rings.hash_size) - if rank == 0 or is_unique(&ring, rings): - ring.nodes = PyMem_Malloc(3 * sizeof(unsigned short)) - ring.nodes[0] = matrix.mapping[i] - ring.nodes[1] = matrix.mapping[j] - ring.nodes[2] = path.pid1[k][0] - push_ring(ring, rings, rank) - print('$', i, j, rank, ring_to_tuple(rings.rings[rank].nodes, rings.rings[rank].n_nodes)) + ring.hash = build_hash(NULL, path.pid1[k], ri, rj, 0, 1, rings.hash_size) + + if is_unique(&ring, rings): + ring.nodes = build_ring(path.pid1[k], NULL, ri, rj, 1, 0) + push_ring(ring, rings) else: PyMem_Free(ring.hash) elif path.num_pid0 == 1: # is odd? if not path.num_pid1: continue - ring.n_nodes = 2 * path.distance + 1 - rank = get_rank(rings, ring.n_nodes) - if rank == USHRT_MAX: continue # ring is not in the smallest set + ring.n_nodes = 2 * d + 1 for k in range(path.num_pid1): - ring.hash = build_hash(path.pid0[0], path.pid1[k], matrix.mapping[i], matrix.mapping[j], - path.distance - 1, path.distance, rings.hash_size) - if rank == 0 or is_unique(&ring, rings): - print('%', i, j, rank) - ring.nodes = build_ring(path.pid0[0], path.pid1[k], matrix.mapping[i], matrix.mapping[j], - path.distance - 1, path.distance) - push_ring(ring, rings, rank) + ring.hash = build_hash(path.pid0[0], path.pid1[k], ri, rj, d - 1, d, rings.hash_size) + + if is_unique(&ring, rings): + ring.nodes = build_ring(path.pid0[0], path.pid1[k], ri, rj, d - 1, d) + push_ring(ring, rings) else: PyMem_Free(ring.hash) else: # is even - ring.n_nodes = 2 * path.distance - rank = get_rank(rings, ring.n_nodes) - if rank == USHRT_MAX: continue + ring.n_nodes = 2 * d + d -= 1 for k in range(1, path.num_pid0): - ring.hash = build_hash(path.pid0[k - 1], path.pid0[k], matrix.mapping[i], matrix.mapping[j], - path.distance - 1, path.distance - 1, rings.hash_size) - if rank == 0 or is_unique(&ring, rings): - ring.nodes = build_ring(path.pid0[k - 1], path.pid0[k], matrix.mapping[i], matrix.mapping[j], - path.distance - 1, path.distance - 1) - push_ring(ring, rings, rank) - print('@', i, j, rank, ring_to_tuple(rings.rings[rank].nodes, rings.rings[rank].n_nodes)) + ring.hash = build_hash(path.pid0[k - 1], path.pid0[k], ri, rj, d, d, rings.hash_size) + + if is_unique(&ring, rings): + ring.nodes = build_ring(path.pid0[k - 1], path.pid0[k], ri, rj, d, d) + push_ring(ring, rings) else: PyMem_Free(ring.hash) @@ -433,9 +416,9 @@ def sssr(dict graph, size_t n_rings): # print('$', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid1[k], p.distance)) find_rings(matrix, rings) - # for i in range(n_rings): - # ring = rings.rings[i] - # output.append(ring_to_tuple(ring.nodes, ring.n_nodes)) + for i in range(n_rings): + ring = rings.rings[i] + output.append(ring_to_tuple(ring.nodes, ring.n_nodes)) free_dist_matrix(matrix) free_rings(rings) From 0e969ae816878a10425d5797c89164335b0e8f80 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Fri, 22 Aug 2025 09:32:28 +0200 Subject: [PATCH 10/16] saved --- chython/algorithms/_rings.pyx | 85 ++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 5591b056..8327b025 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -92,6 +92,16 @@ cdef rings_t *alloc_rings(unsigned short n_rings, size_t hash_size): return rings +cdef void realloc_rings(rings_t *rings, size_t n_rings): + cdef size_t i, old_n_rings = rings.n_rings + rings.n_rings = n_rings + rings.rings = PyMem_Realloc(rings.rings, n_rings * sizeof(ring_t)) + memset(rings.rings + old_n_rings, 0, (n_rings - old_n_rings) * sizeof(ring_t)) + + for i in range(old_n_rings, n_rings): + rings.rings[i].n_nodes = USHRT_MAX + + cdef void free_pid0(paths_t *paths): cdef size_t i if paths.num_pid0 == 0: return @@ -249,40 +259,43 @@ cdef unsigned short *build_ring(unsigned short *path1, unsigned short *path2, return nodes -cdef void push_ring(ring_t ring, rings_t *rings): +cdef void push_ring(ring_t ring, rings_t *rings, unsigned short rank): cdef size_t i + cdef unsigned short ext - if rings.n_allocated == rings.n_rings: - # drop last ring - free_ring(&rings.rings[rings.n_rings - 1]) - else: - rings.n_allocated += 1 + print('@', ring_to_tuple(ring.nodes, ring.n_nodes)) - for i in range(rings.n_allocated - 1, rank, -1): + if rings.n_allocated == rings.n_rings - 1: # almost all slots are busy + ext = rings.n_rings + if ext > 1000: ext = 1000 # no more than 1k rings to extend + realloc_rings(rings, rings.n_rings + ext) + + for i in range(rings.n_allocated, rank, -1): rings.rings[i] = rings.rings[i - 1] # insert the new ring at the rank position rings.rings[rank] = ring + rings.n_allocated += 1 -cdef int is_unique(ring_t *ring, rings_t *rings): +cdef unsigned short get_rank(ring_t *ring, rings_t *rings): cdef size_t i, j cdef int hash_match cdef ring_t *other - for i in range(rings.n_rings): + for i in range(rings.n_allocated + 1): other = &rings.rings[i] if ring.n_nodes == other.n_nodes: hash_match = 1 for j in range(rings.hash_size): - if ring.hash[j] ^ other.hash[j] != 0: + if ring.hash[j] != other.hash[j]: hash_match = 0 break if hash_match: - return 0 + return USHRT_MAX # duplicate found elif ring.n_nodes < other.n_nodes: - return 1 - return 0 + return i + return i cdef void build_pid(dist_matrix_t *matrix): @@ -325,11 +338,18 @@ cdef void build_pid(dist_matrix_t *matrix): append_pid1(paths_ij, paths_ik, paths_kj, rk) -cdef void find_rings(dist_matrix_t *matrix, rings_t *rings): - cdef size_t i, j, k, si - cdef unsigned short ri, rj, d +cdef rings_t *build_cset(dist_matrix_t *matrix, size_t n_rings): + cdef size_t i, j, k, si, n_max = 0 + cdef unsigned short ri, rj, d, rank cdef ring_t ring cdef paths_t *path + cdef rings_t *rings + + for i in range(matrix.n_nodes): + ri = matrix.mapping[i] + if ri > n_max: n_max = ri + + rings = alloc_rings(n_rings, (n_max + ULL_BITS - 1) // ULL_BITS) for i in range(matrix.n_nodes): si = i * matrix.n_nodes @@ -347,9 +367,10 @@ cdef void find_rings(dist_matrix_t *matrix, rings_t *rings): for k in range(path.num_pid1): ring.hash = build_hash(NULL, path.pid1[k], ri, rj, 0, 1, rings.hash_size) - if is_unique(&ring, rings): + rank = get_rank(&ring, rings) + if rank != USHRT_MAX: ring.nodes = build_ring(path.pid1[k], NULL, ri, rj, 1, 0) - push_ring(ring, rings) + push_ring(ring, rings, rank) else: PyMem_Free(ring.hash) elif path.num_pid0 == 1: # is odd? @@ -358,9 +379,10 @@ cdef void find_rings(dist_matrix_t *matrix, rings_t *rings): for k in range(path.num_pid1): ring.hash = build_hash(path.pid0[0], path.pid1[k], ri, rj, d - 1, d, rings.hash_size) - if is_unique(&ring, rings): + rank = get_rank(&ring, rings) + if rank != USHRT_MAX: ring.nodes = build_ring(path.pid0[0], path.pid1[k], ri, rj, d - 1, d) - push_ring(ring, rings) + push_ring(ring, rings, rank) else: PyMem_Free(ring.hash) else: # is even @@ -369,19 +391,19 @@ cdef void find_rings(dist_matrix_t *matrix, rings_t *rings): for k in range(1, path.num_pid0): ring.hash = build_hash(path.pid0[k - 1], path.pid0[k], ri, rj, d, d, rings.hash_size) - if is_unique(&ring, rings): + rank = get_rank(&ring, rings) + if rank != USHRT_MAX: ring.nodes = build_ring(path.pid0[k - 1], path.pid0[k], ri, rj, d, d) - push_ring(ring, rings) + push_ring(ring, rings, rank) else: PyMem_Free(ring.hash) + return rings def sssr(dict graph, size_t n_rings): cdef size_t si - cdef unsigned short i, n, m, n_max = 0, n_nodes = len(graph) + cdef unsigned short i, n, m, n_nodes = len(graph) cdef unsigned short [USHRT_MAX] reverse_mapping # 128kb - cdef ring_t ring - cdef rings_t *rings cdef object mb cdef list output = [] @@ -390,15 +412,12 @@ def sssr(dict graph, size_t n_rings): for i, n in enumerate(graph): matrix.mapping[i] = n reverse_mapping[n] = i - if n > n_max: n_max = n for n, mb in graph.items(): si = n_nodes * reverse_mapping[n] for m in mb: matrix.data[si + reverse_mapping[m]].distance = 1 - rings = alloc_rings(n_rings, (n_max + ULL_BITS - 1) // ULL_BITS) - # run PID matrix calculation build_pid(matrix) @@ -415,11 +434,13 @@ def sssr(dict graph, size_t n_rings): # for k in range(p.num_pid1): # print('$', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid1[k], p.distance)) - find_rings(matrix, rings) - for i in range(n_rings): - ring = rings.rings[i] + # run CSET calculation + cset = build_cset(matrix, n_rings) + + for i in range(cset.n_rings): + ring = cset.rings[i] output.append(ring_to_tuple(ring.nodes, ring.n_nodes)) free_dist_matrix(matrix) - free_rings(rings) + free_rings(cset) return output From 02aa314e210266f8d139c3b142105550261480ec Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Aug 2025 11:18:16 +0200 Subject: [PATCH 11/16] pid calculation and cset calculation is done --- chython/algorithms/_rings.pyx | 74 ++++++++++++++++------------------- 1 file changed, 33 insertions(+), 41 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 8327b025..84e8d296 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -24,14 +24,11 @@ # along with this program; if not, see . # from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc +from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM from libc.limits cimport USHRT_MAX from libc.string cimport memset, memcpy -cdef enum: - ULL_BITS = sizeof(unsigned long long) * 8 - - cdef struct paths_t: # store PID units. paths store only non-terminal nodes # in the case of distances equals 1 - don't store any paths @@ -145,7 +142,7 @@ cdef void free_ring(ring_t *ring): cdef void free_rings(rings_t *rings): cdef size_t i - for i in range(rings.n_rings): + for i in range(rings.n_allocated): free_ring(&rings.rings[i]) PyMem_Free(rings.rings) PyMem_Free(rings) @@ -192,7 +189,11 @@ cdef void append_pid1(paths_t *paths_ij, paths_t *paths_ik, paths_t *paths_kj, u cdef tuple ring_to_tuple(unsigned short *ring, size_t size): cdef size_t i - return tuple(ring[i] for i in range(size)) + cdef tuple result = PyTuple_New(size) + + for i in range(size): + PyTuple_SET_ITEM(result, i, ring[i]) + return result cdef int has_not_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij, unsigned short k, int test_pid1): @@ -230,15 +231,15 @@ cdef unsigned long long *build_hash(unsigned short *path1, unsigned short *path2 cdef unsigned long long *hash = PyMem_Malloc(hash_size * sizeof(unsigned long long)) memset(hash, 0, hash_size * sizeof(unsigned long long)) - hash[i // ULL_BITS] |= (1 << (i % ULL_BITS)) # add linker node - hash[j // ULL_BITS] |= (1 << (j % ULL_BITS)) + hash[i // 64] |= (1 << (i % 64)) # add linker node + hash[j // 64] |= (1 << (j % 64)) for k in range(size1): node = path1[k] - hash[node // ULL_BITS] |= (1 << (node % ULL_BITS)) + hash[node // 64] |= (1 << (node % 64)) for k in range(size2): node = path2[k] - hash[node // ULL_BITS] |= (1 << (node % ULL_BITS)) + hash[node // 64] |= (1 << (node % 64)) return hash @@ -259,14 +260,12 @@ cdef unsigned short *build_ring(unsigned short *path1, unsigned short *path2, return nodes -cdef void push_ring(ring_t ring, rings_t *rings, unsigned short rank): +cdef void push_ring(ring_t *ring, rings_t *rings, unsigned short rank): cdef size_t i cdef unsigned short ext - print('@', ring_to_tuple(ring.nodes, ring.n_nodes)) - if rings.n_allocated == rings.n_rings - 1: # almost all slots are busy - ext = rings.n_rings + ext = rings.n_rings # double rings storage if ext > 1000: ext = 1000 # no more than 1k rings to extend realloc_rings(rings, rings.n_rings + ext) @@ -274,7 +273,7 @@ cdef void push_ring(ring_t ring, rings_t *rings, unsigned short rank): rings.rings[i] = rings.rings[i - 1] # insert the new ring at the rank position - rings.rings[rank] = ring + rings.rings[rank] = ring[0] # copy dereferenced struct memory rings.n_allocated += 1 @@ -283,7 +282,7 @@ cdef unsigned short get_rank(ring_t *ring, rings_t *rings): cdef int hash_match cdef ring_t *other - for i in range(rings.n_allocated + 1): + for i in range( rings.n_allocated + 1): other = &rings.rings[i] if ring.n_nodes == other.n_nodes: hash_match = 1 @@ -293,9 +292,8 @@ cdef unsigned short get_rank(ring_t *ring, rings_t *rings): break if hash_match: return USHRT_MAX # duplicate found - elif ring.n_nodes < other.n_nodes: + elif ring.n_nodes < other.n_nodes: # always true for n_allocated+1 return i - return i cdef void build_pid(dist_matrix_t *matrix): @@ -349,7 +347,7 @@ cdef rings_t *build_cset(dist_matrix_t *matrix, size_t n_rings): ri = matrix.mapping[i] if ri > n_max: n_max = ri - rings = alloc_rings(n_rings, (n_max + ULL_BITS - 1) // ULL_BITS) + rings = alloc_rings(n_rings, (n_max + 64 - 1) // 64) for i in range(matrix.n_nodes): si = i * matrix.n_nodes @@ -357,7 +355,7 @@ cdef rings_t *build_cset(dist_matrix_t *matrix, size_t n_rings): for j in range(matrix.n_nodes): if i == j: continue path = &matrix.data[si + j] - d = path.distance + d = path.distance # make shortcut if d == USHRT_MAX: continue # different components rj = matrix.mapping[j] @@ -370,7 +368,7 @@ cdef rings_t *build_cset(dist_matrix_t *matrix, size_t n_rings): rank = get_rank(&ring, rings) if rank != USHRT_MAX: ring.nodes = build_ring(path.pid1[k], NULL, ri, rj, 1, 0) - push_ring(ring, rings, rank) + push_ring(&ring, rings, rank) else: PyMem_Free(ring.hash) elif path.num_pid0 == 1: # is odd? @@ -382,7 +380,7 @@ cdef rings_t *build_cset(dist_matrix_t *matrix, size_t n_rings): rank = get_rank(&ring, rings) if rank != USHRT_MAX: ring.nodes = build_ring(path.pid0[0], path.pid1[k], ri, rj, d - 1, d) - push_ring(ring, rings, rank) + push_ring(&ring, rings, rank) else: PyMem_Free(ring.hash) else: # is even @@ -394,16 +392,22 @@ cdef rings_t *build_cset(dist_matrix_t *matrix, size_t n_rings): rank = get_rank(&ring, rings) if rank != USHRT_MAX: ring.nodes = build_ring(path.pid0[k - 1], path.pid0[k], ri, rj, d, d) - push_ring(ring, rings, rank) + push_ring(&ring, rings, rank) else: PyMem_Free(ring.hash) return rings +cdef void filter_rings(rings_t *rings): + ... + + def sssr(dict graph, size_t n_rings): cdef size_t si cdef unsigned short i, n, m, n_nodes = len(graph) cdef unsigned short [USHRT_MAX] reverse_mapping # 128kb + cdef ring_t ring + cdef rings_t *rings cdef object mb cdef list output = [] @@ -420,27 +424,15 @@ def sssr(dict graph, size_t n_rings): # run PID matrix calculation build_pid(matrix) - - # DEBUG - # cdef paths_t p - # for i in range(n_nodes): - # for j in range(n_nodes): - # if i == j: continue - # p = matrix.data[i * n_nodes + j] - # if p.distance == 1: - # print('!', matrix.mapping[i], matrix.mapping[j]) - # for k in range(p.num_pid0): - # print('?', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid0[k], p.distance - 1)) - # for k in range(p.num_pid1): - # print('$', matrix.mapping[i], matrix.mapping[j], ring_to_tuple(p.pid1[k], p.distance)) - # run CSET calculation - cset = build_cset(matrix, n_rings) + rings = build_cset(matrix, n_rings) + # filter out condensed rings + filter_rings(rings) - for i in range(cset.n_rings): - ring = cset.rings[i] + for i in range(rings.n_allocated): + ring = rings.rings[i] output.append(ring_to_tuple(ring.nodes, ring.n_nodes)) free_dist_matrix(matrix) - free_rings(cset) + free_rings(rings) return output From 0b1ea5b09a0f0a93115073134a90babea05ad0b7 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Aug 2025 12:48:31 +0200 Subject: [PATCH 12/16] filtering implemented --- chython/algorithms/_rings.pyx | 48 ++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 84e8d296..8f0c59bf 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -90,12 +90,11 @@ cdef rings_t *alloc_rings(unsigned short n_rings, size_t hash_size): cdef void realloc_rings(rings_t *rings, size_t n_rings): - cdef size_t i, old_n_rings = rings.n_rings - rings.n_rings = n_rings + cdef size_t i rings.rings = PyMem_Realloc(rings.rings, n_rings * sizeof(ring_t)) - memset(rings.rings + old_n_rings, 0, (n_rings - old_n_rings) * sizeof(ring_t)) + memset(rings.rings + rings.n_rings, 0, (n_rings - rings.n_rings) * sizeof(ring_t)) - for i in range(old_n_rings, n_rings): + for i in range(rings.n_rings, n_rings): rings.rings[i].n_nodes = USHRT_MAX @@ -399,7 +398,42 @@ cdef rings_t *build_cset(dist_matrix_t *matrix, size_t n_rings): cdef void filter_rings(rings_t *rings): - ... + cdef size_t i, j, k, pc = 0 + cdef ring_t *ring + cdef int is_unique + cdef unsigned long long *hash + cdef unsigned short *poplist + + if rings.n_allocated == rings.n_rings: return + + hash = PyMem_Malloc(rings.hash_size * sizeof(unsigned long long)) + poplist = PyMem_Malloc(rings.n_allocated * sizeof(unsigned short)) + memcpy(hash, rings.rings[0].hash, rings.hash_size * sizeof(unsigned long long)) + + for i in range(1, rings.n_allocated): + is_unique = 0 + ring = &rings.rings[i] + for j in range(rings.hash_size): + if ring.hash[j] & (~hash[j]): + is_unique = 1 + break + if is_unique: # extend global hash + for j in range(rings.hash_size): + hash[j] |= ring.hash[j] + else: + poplist[pc] = i + pc += 1 + + for i in range(rings.n_allocated - 1, rings.n_rings - 1, -1): + pc -= 1 + k = poplist[pc] + + free_ring(&rings.rings[k]) # drop condensed ring + for j in range(k, i): + rings.rings[j] = rings.rings[j + 1] + + rings.n_allocated = rings.n_rings + PyMem_Free(poplist) def sssr(dict graph, size_t n_rings): @@ -408,11 +442,13 @@ def sssr(dict graph, size_t n_rings): cdef unsigned short [USHRT_MAX] reverse_mapping # 128kb cdef ring_t ring cdef rings_t *rings + cdef dist_matrix_t *matrix cdef object mb cdef list output = [] - cdef dist_matrix_t *matrix = alloc_dist_matrix(n_nodes) + if not n_rings: return output + matrix = alloc_dist_matrix(n_nodes) for i, n in enumerate(graph): matrix.mapping[i] = n reverse_mapping[n] = i From edc8446fc51bff89f1840eba4729a2930310b35a Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Aug 2025 13:36:16 +0200 Subject: [PATCH 13/16] fixed mem management --- chython/algorithms/_rings.pyx | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 8f0c59bf..c6a6e8b8 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -54,6 +54,7 @@ cdef struct ring_t: cdef struct rings_t: unsigned short n_rings unsigned short n_allocated + unsigned short n_reserved unsigned short hash_size ring_t *rings @@ -80,6 +81,7 @@ cdef rings_t *alloc_rings(unsigned short n_rings, size_t hash_size): rings.n_rings = n_rings rings.n_allocated = 0 + rings.n_reserved = n_rings rings.hash_size = hash_size rings.rings = PyMem_Malloc(n_rings * sizeof(ring_t)) memset(rings.rings, 0, n_rings * sizeof(ring_t)) @@ -92,10 +94,11 @@ cdef rings_t *alloc_rings(unsigned short n_rings, size_t hash_size): cdef void realloc_rings(rings_t *rings, size_t n_rings): cdef size_t i rings.rings = PyMem_Realloc(rings.rings, n_rings * sizeof(ring_t)) - memset(rings.rings + rings.n_rings, 0, (n_rings - rings.n_rings) * sizeof(ring_t)) + memset(rings.rings + rings.n_reserved, 0, (n_rings - rings.n_reserved) * sizeof(ring_t)) - for i in range(rings.n_rings, n_rings): + for i in range(rings.n_reserved, n_rings): rings.rings[i].n_nodes = USHRT_MAX + rings.n_reserved = n_rings cdef void free_pid0(paths_t *paths): @@ -263,8 +266,8 @@ cdef void push_ring(ring_t *ring, rings_t *rings, unsigned short rank): cdef size_t i cdef unsigned short ext - if rings.n_allocated == rings.n_rings - 1: # almost all slots are busy - ext = rings.n_rings # double rings storage + if rings.n_allocated == rings.n_reserved - 1: # almost all slots are used + ext = rings.n_reserved # double rings storage if ext > 1000: ext = 1000 # no more than 1k rings to extend realloc_rings(rings, rings.n_rings + ext) @@ -434,6 +437,7 @@ cdef void filter_rings(rings_t *rings): rings.n_allocated = rings.n_rings PyMem_Free(poplist) + PyMem_Free(hash) def sssr(dict graph, size_t n_rings): @@ -462,13 +466,13 @@ def sssr(dict graph, size_t n_rings): build_pid(matrix) # run CSET calculation rings = build_cset(matrix, n_rings) + free_dist_matrix(matrix) # filter out condensed rings filter_rings(rings) - for i in range(rings.n_allocated): + for i in range(rings.n_rings): ring = rings.rings[i] output.append(ring_to_tuple(ring.nodes, ring.n_nodes)) - free_dist_matrix(matrix) free_rings(rings) return output From 2c00598e7604e4172e06baf4ad6d514e8430298d Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Aug 2025 18:55:05 +0200 Subject: [PATCH 14/16] fixes --- chython/algorithms/_rings.pyx | 42 +++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index c6a6e8b8..5f6398d8 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -76,17 +76,17 @@ cdef dist_matrix_t *alloc_dist_matrix(unsigned short n_nodes): cdef rings_t *alloc_rings(unsigned short n_rings, size_t hash_size): - cdef size_t i + cdef size_t i, n1 = n_rings + 1 cdef rings_t *rings = PyMem_Malloc(sizeof(rings_t)) rings.n_rings = n_rings rings.n_allocated = 0 - rings.n_reserved = n_rings + rings.n_reserved = n1 rings.hash_size = hash_size - rings.rings = PyMem_Malloc(n_rings * sizeof(ring_t)) - memset(rings.rings, 0, n_rings * sizeof(ring_t)) + rings.rings = PyMem_Malloc(n1 * sizeof(ring_t)) + memset(rings.rings, 0, n1 * sizeof(ring_t)) - for i in range(n_rings): + for i in range(n1): rings.rings[i].n_nodes = USHRT_MAX return rings @@ -189,13 +189,13 @@ cdef void append_pid1(paths_t *paths_ij, paths_t *paths_ik, paths_t *paths_kj, u paths_ij.pid1[i] = concatenate_paths(paths_ik, paths_kj, k) -cdef tuple ring_to_tuple(unsigned short *ring, size_t size): +cdef tuple ring_to_tuple(ring_t *ring): cdef size_t i - cdef tuple result = PyTuple_New(size) + cdef list result = [] - for i in range(size): - PyTuple_SET_ITEM(result, i, ring[i]) - return result + for i in range(ring.n_nodes): + result.append(ring.nodes[i]) + return tuple(result) cdef int has_not_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij, unsigned short k, int test_pid1): @@ -229,19 +229,19 @@ cdef int has_not_overlap(paths_t *paths_ik, paths_t *paths_kj, paths_t *paths_ij cdef unsigned long long *build_hash(unsigned short *path1, unsigned short *path2, unsigned short i, unsigned short j, size_t size1, size_t size2, size_t hash_size): cdef size_t k - cdef unsigned int node + cdef unsigned short n cdef unsigned long long *hash = PyMem_Malloc(hash_size * sizeof(unsigned long long)) memset(hash, 0, hash_size * sizeof(unsigned long long)) - hash[i // 64] |= (1 << (i % 64)) # add linker node - hash[j // 64] |= (1 << (j % 64)) + hash[i // 64] |= ( 1 << (i % 64)) # add linker node + hash[j // 64] |= ( 1 << (j % 64)) for k in range(size1): - node = path1[k] - hash[node // 64] |= (1 << (node % 64)) + n = path1[k] + hash[n // 64] |= ( 1 << (n % 64)) for k in range(size2): - node = path2[k] - hash[node // 64] |= (1 << (node % 64)) + n = path2[k] + hash[n // 64] |= ( 1 << (n % 64)) return hash @@ -269,7 +269,7 @@ cdef void push_ring(ring_t *ring, rings_t *rings, unsigned short rank): if rings.n_allocated == rings.n_reserved - 1: # almost all slots are used ext = rings.n_reserved # double rings storage if ext > 1000: ext = 1000 # no more than 1k rings to extend - realloc_rings(rings, rings.n_rings + ext) + realloc_rings(rings, rings.n_reserved + ext) for i in range(rings.n_allocated, rank, -1): rings.rings[i] = rings.rings[i - 1] @@ -444,7 +444,6 @@ def sssr(dict graph, size_t n_rings): cdef size_t si cdef unsigned short i, n, m, n_nodes = len(graph) cdef unsigned short [USHRT_MAX] reverse_mapping # 128kb - cdef ring_t ring cdef rings_t *rings cdef dist_matrix_t *matrix cdef object mb @@ -452,8 +451,10 @@ def sssr(dict graph, size_t n_rings): if not n_rings: return output + if n_nodes > 65500: raise ValueError('Too many atoms') matrix = alloc_dist_matrix(n_nodes) for i, n in enumerate(graph): + if n > 65500: raise ValueError('Atom index too large') matrix.mapping[i] = n reverse_mapping[n] = i @@ -471,8 +472,7 @@ def sssr(dict graph, size_t n_rings): filter_rings(rings) for i in range(rings.n_rings): - ring = rings.rings[i] - output.append(ring_to_tuple(ring.nodes, ring.n_nodes)) + output.append(ring_to_tuple(&rings.rings[i])) free_rings(rings) return output From 715cdf80136688a5b38b122526c54ea37edc777a Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Aug 2025 19:56:04 +0200 Subject: [PATCH 15/16] integrated --- chython/algorithms/_rings.pyx | 3 +- chython/algorithms/aromatics/thiele.py | 7 +- chython/algorithms/rings.py | 375 +------------------------ pyproject.toml | 2 +- 4 files changed, 9 insertions(+), 378 deletions(-) diff --git a/chython/algorithms/_rings.pyx b/chython/algorithms/_rings.pyx index 5f6398d8..c86a2f28 100644 --- a/chython/algorithms/_rings.pyx +++ b/chython/algorithms/_rings.pyx @@ -24,7 +24,6 @@ # along with this program; if not, see . # from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc -from cpython.tuple cimport PyTuple_New, PyTuple_SET_ITEM from libc.limits cimport USHRT_MAX from libc.string cimport memset, memcpy @@ -440,7 +439,7 @@ cdef void filter_rings(rings_t *rings): PyMem_Free(hash) -def sssr(dict graph, size_t n_rings): +def sssr(object graph, size_t n_rings): cdef size_t si cdef unsigned short i, n, m, n_nodes = len(graph) cdef unsigned short [USHRT_MAX] reverse_mapping # 128kb diff --git a/chython/algorithms/aromatics/thiele.py b/chython/algorithms/aromatics/thiele.py index c6682247..3c42fc45 100644 --- a/chython/algorithms/aromatics/thiele.py +++ b/chython/algorithms/aromatics/thiele.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2021-2024 Ramil Nugmanov +# Copyright 2021-2025 Ramil Nugmanov # This file is part of chython. # # chython is free software; you can redistribute it and/or modify @@ -19,7 +19,8 @@ from collections import defaultdict from typing import TYPE_CHECKING from ._rules import freak_rules -from ..rings import _sssr, _connected_components +from .._rings import sssr +from ..rings import _connected_components if TYPE_CHECKING: @@ -186,7 +187,7 @@ def thiele(self: 'MoleculeContainer', *, fix_tautomers=True) -> bool: n_sssr = sum(len(x) for x in rings.values()) // 2 - len(rings) + len(_connected_components(rings)) if not n_sssr: return False - rings = _sssr(rings, n_sssr) # search rings again + rings = sssr(rings, n_sssr) # search rings again seen = set() for ring in rings: diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index f7dc58de..62722a7d 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -18,10 +18,8 @@ # from collections import defaultdict, deque from functools import cached_property -from itertools import combinations -from operator import itemgetter -from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING, Union -from ..exceptions import ImplementationError +from typing import Any, Dict, List, Set, Tuple, Union, TYPE_CHECKING +from ._rings import sssr if TYPE_CHECKING: @@ -45,7 +43,7 @@ def sssr(self) -> List[Tuple[int, ...]]: :return rings atoms numbers """ if self.rings_count: - return _sssr(self.not_special_connectivity, self.rings_count) + return sssr(self.not_special_connectivity, self.rings_count) return [] @cached_property @@ -148,17 +146,6 @@ def rings_graph(self: 'MoleculeContainer'): return bonds -def _sssr(bonds: Dict[int, Union[Set[int], Dict[int, Any]]], n_sssr: int) -> List[Tuple[int, ...]]: - """ - Smallest Set of Smallest Rings of any adjacency matrix. - Number of rings required. - """ - bonds = _skin_graph(bonds) - paths = _bfs(bonds) - pid1, pid2, dist = _make_pid(paths) - return _rings_filter(_c_set(pid1, pid2, dist), n_sssr) - - def _connected_components(bonds: Dict[int, Union[Set[int], Dict[int, Any]]]) -> List[Set[int]]: atoms = set(bonds) components = [] @@ -192,360 +179,4 @@ def _skin_graph(bonds: Dict[int, Union[Set[int], Dict[int, Any]]]) -> Dict[int, return bonds -def _bfs(bonds): - atoms = set(bonds) - terminated = [] - tail = atoms.pop() - next_stack = {x: [tail, x] for x in bonds[tail]} - - while True: - next_front = set() - found_odd = set() - stack, next_stack = next_stack, {} - for tail, path in stack.items(): - neighbors = bonds[tail] & atoms - next_front.add(tail) - - if len(neighbors) == 1: - n = neighbors.pop() - if n in found_odd: - if len(path) != 1: - terminated.append(tuple(path)) # save second ring closure - next_stack[n] = [n] # maybe we have another path? - else: - path.append(n) - if n in stack: # odd rings - found_odd.add(tail) - terminated.append(tuple(path)) # found ring closure. save path. - elif n in next_stack: # even rings - terminated.append(tuple(path)) - if len(next_stack[n]) != 1: # prevent bicycle case - terminated.append(tuple(next_stack[n])) - next_stack[n] = [n] - else: - next_stack[n] = path # grow must go on - elif neighbors: - if len(path) != 1: - terminated.append(tuple(path)) # save path. - for n in neighbors: - if n in found_odd: - if n in stack: - if n in next_stack: - del next_stack[n] - else: - next_stack[n] = [n] - else: - path = [tail, n] - if n in stack: # odd rings - found_odd.add(tail) - terminated.append(tuple(path)) - elif n in next_stack: # even rings - terminated.append(tuple(path)) - if len(next_stack[n]) != 1: # prevent bicycle case - terminated.append(tuple(next_stack[n])) - next_stack[n] = [n] - else: - next_stack[n] = path - - atoms.difference_update(next_front) - if not atoms: - break - elif not next_stack: - tail = atoms.pop() - next_stack = {x: [tail, x] for x in bonds[tail] & atoms} - return terminated - - -def _make_pid(paths: List[List[int]]): - pid1 = defaultdict(lambda: defaultdict(dict)) - pid2 = defaultdict(lambda: defaultdict(dict)) - distances = defaultdict(lambda: defaultdict(lambda: 1e9)) - chains = sorted(paths, key=len) - for c in chains: - di = len(c) - 1 - n, m = c[0], c[-1] - nn, mm = c[1], c[-2] - if n in distances and m in distances[n] and distances[n][m] != di: - pid2[n][m][(nn, mm)] = c - pid2[m][n][(mm, nn)] = c[::-1] - else: - pid1[n][m][(nn, mm)] = c - pid1[m][n][(mm, nn)] = c[::-1] - distances[n][m] = distances[m][n] = di - - for k in pid1: - new_distances = defaultdict(dict) - dk = distances[k] - ndk = new_distances[k] - for i in pid1: - if i == k: - continue - di = distances[i] - ndi = new_distances[i] - ndk[i] = ndi[k] = di[k] - for j in pid1: - if j == k or j == i: - continue - ij = di[j] - ikj = di[k] + dk[j] - if ij - ikj == 1: # A new shortest path == previous shortest path - 1 - pid2[i][j] = pid1[i][j] - pid1[i][j] = {(ni, mj): ip[:-1] + jp for ((ni, _), ip), ((_, mj), jp) in - zip(pid1[i][k].items(), pid1[k][j].items())} - ndi[j] = ikj - elif ij > ikj: # A new shortest path - pid2[i][j] = {} - pid1[i][j] = {(ni, mj): ip[:-1] + jp for ((ni, _), ip), ((_, mj), jp) in - zip(pid1[i][k].items(), pid1[k][j].items())} - ndi[j] = ikj - elif ij == ikj: # Another shortest path - pid1[i][j].update({(ni, mj): ip[:-1] + jp for ((ni, _), ip), ((_, mj), jp) in - zip(pid1[i][k].items(), pid1[k][j].items())}) - ndi[j] = ij - elif ikj - ij == 1: # Shortest+1 path - pid2[i][j].update({(ni, mj): ip[:-1] + jp for ((ni, _), ip), ((_, mj), jp) in - zip(pid1[i][k].items(), pid1[k][j].items())}) - ndi[j] = ij - else: - ndi[j] = ij - distances = new_distances - return pid1, pid2, distances - - -def _c_set(pid1, pid2, pid1l): - c_set = [] - seen = set() - for i, p1i in pid1.items(): - seen.add(i) - di = pid1l[i] - p2i = pid2[i] - - for j, p1ij in p1i.items(): - if j in seen: - continue - p1ij = list(p1ij.values()) - p2ij = list(p2i[j].values()) - dij = di[j] * 2 - - if len(p1ij) == 1: # one shortest - if not p2ij: # need shortest + 1 path - continue - c_set.append((dij + 1, p1ij, p2ij)) - elif not p2ij: # one or more odd rings - c_set.append((dij, p1ij, None)) - else: # odd and even rings found (e.g. bicycle) - c_set.append((dij, p1ij, None)) - c_set.append((dij + 1, p1ij, p2ij)) - - for c_num, p1ij, p2ij in sorted(c_set, key=itemgetter(0)): - if c_num % 2: # odd rings - for c1 in p1ij: - for c2 in p2ij: - c = c1 + c2[-2:0:-1] - if len(c) == len(set(c)): - yield _canonic_ring(c) - else: - for c1, c2 in zip(p1ij, p1ij[1:]): - c = c1 + c2[-2:0:-1] - if len(c) == len(set(c)): - yield _canonic_ring(c) - - -def _canonic_ring(ring: Tuple[int, ...]) -> Tuple[int, ...]: - n = min(ring) - ndx = ring.index(n) - if ndx == 0: - if ring[-1] < ring[1]: - return n, *ring[:0:-1] - return ring - elif ndx == len(ring) - 1: - if ring[0] > ring[-2]: - return ring[::-1] - return n, *ring[:-1] - if ring[ndx + 1] > ring[ndx - 1]: - return *ring[ndx::-1], *ring[:ndx:-1] - return *ring[ndx:], *ring[:ndx] - - -def _ring_scissors(ring: Tuple[int, ...], n: int, m: int) -> Tuple[int, ...]: - ndx = ring.index(n) - mdx = ring.index(m) - if ndx == 0: - if mdx == 1: - return n, *ring[:0:-1] - return ring - elif ndx == len(ring) - 1: - if mdx == 0: - return ring[::-1] - return n, *ring[:-1] - if ndx < mdx: - return *ring[ndx::-1], *ring[:ndx:-1] - return *ring[ndx:], *ring[:ndx] - - -def _ring_adjacency(ring: Tuple[int, ...]) -> Dict[int, List[int]]: - adj = {ring[0]: [ring[-1]]} # ring adjacency matrix - for n, m in zip(ring, ring[1:]): - adj[n].append(m) - adj[m] = [n] - adj[m].append(ring[0]) - return adj - - -def _is_condensed_ring(c, sssr, seen_rings): - # create graph of connected neighbour rings - ck = seen_rings[c] - neighbors = {x: set() for x in sssr if len(seen_rings[x].keys() & ck.keys()) > 1} - if len(neighbors) > 1: - for (i, iv), (j, jv) in combinations(neighbors.items(), 2): - if len(seen_rings[i].keys() & seen_rings[j].keys()) > 1: - iv.add(j) - jv.add(i) - # check if hold rings is combination of existing. (123654) is combo of (1254) and (2365) - # - # 1--2--3 - # | | | - # 4--5--6 - # - # modified NX.dfs_labeled_edges - # https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.\ - # traversal.depth_first_search.dfs_labeled_edges.html - depth_limit = len(neighbors) - 1 - for start, nbrs in neighbors.items(): - if not nbrs: - continue - stack = [(start, seen_rings[start], depth_limit, iter(nbrs), {start})] - while stack: - parent, p_adj, depth_now, children, seen = stack[-1] - try: - child = next(children) - except StopIteration: - stack.pop() - else: - if child not in seen: - common = p_adj.keys() & seen_rings[child].keys() - if len(common) > 2: # only terminal common atoms required - term = {n for n in common if len(common.intersection(p_adj[n])) == 1} - if len(term) != 2: # skip multiple contacts - continue - common.difference_update(term) - n, m = term - mc = _canonic_ring( - (*_ring_scissors(tuple(x for x in parent if x not in common), n, m), - *_ring_scissors(tuple(x for x in child if x not in common), m, n)[1:-1])) - elif len(common) == 2: - n, m = common - mc = _canonic_ring((*_ring_scissors(parent, n, m), *_ring_scissors(child, m, n)[1:-1])) - else: # point connections - continue - if c == mc: # macrocycle found - return True - elif depth_now and 2 < len(mc) <= len(c) + 1: - stack.append((mc, _ring_adjacency(mc), depth_now - 1, iter(neighbors[child]), - {child} | seen)) - return False - - -def _get_unique_chord(ring: Tuple[int, ...], common: Set[int]) -> Optional[Tuple[int, ...]]: - lc = len(common) - if len(ring) == lc: - if common == set(ring): - return () - else: - if common == set(ring[:lc]): - return *ring[lc - 1:], ring[0] - for _ in range(len(ring) - 1): - ring = (*ring[1:], ring[0]) - if common == set(ring[:lc]): - return *ring[lc - 1:], ring[0] - - -def _connected_rings(rings, seen_rings): - rings = rings.copy() - out = [] - for i in range(len(rings)): - c = rings[i] - ck = seen_rings[c] - for j in range(i + 1, len(rings)): - r = rings[j] - rk = seen_rings[r] - common = rk.keys() & ck.keys() - if len(common) == 2: # one common bond - n, m = common - if m in ck[n] and m in rk[n]: # only common bond! - c = _canonic_ring((*_ring_scissors(c, n, m), *_ring_scissors(r, m, n)[1:-1])) - ck = _ring_adjacency(c) - rings[j] = c - seen_rings[c] = ck - break - elif len(common) > 2: - cc = _get_unique_chord(c, common) - if cc is None: # skip multitouched rings - continue - r = _get_unique_chord(r, common) - if r is None: - continue - if cc: - if r: - if r[0] == cc[0]: - r = r[::-1] - c = _canonic_ring((*cc, *r[1:-1])) - ck = _ring_adjacency(c) - rings[j] = c - seen_rings[c] = ck - break - else: - c = _canonic_ring(cc) - ck = _ring_adjacency(c) - rings[j] = c - seen_rings[c] = ck - break - elif r: - c = _canonic_ring(r) - ck = _ring_adjacency(c) - rings[j] = c - seen_rings[c] = ck - break - else: # isolated ring[s] found - out.append(c) - return out - - -def _rings_filter(rings, n_sssr): - c = next(rings) - if n_sssr == 1: - return [c] - - seen_rings = {c} - sssr_atoms = set(c) - sssr = [c] - hold = [] - for c in rings: - if c in seen_rings: - continue - seen_rings.add(c) - if sssr_atoms.issuperset(c): # potentially condensed ring - hold.append(c) - continue - sssr_atoms.update(c) - sssr.append(c) - if len(sssr) == n_sssr: - return sssr - - # now we have set of plug rings (cuban fullerene), besiege rings and condensed trash - seen_rings = {c: _ring_adjacency(c) for c in seen_rings} # prepare adjacency - condensed_rings = _connected_rings(sssr, seen_rings) # collection of contours of condensed rings - - for c in hold: - if c in condensed_rings or _is_condensed_ring(c, sssr, seen_rings): - continue - condensed_rings.insert(0, c) - condensed_rings = _connected_rings(condensed_rings, seen_rings) - sssr.append(c) - if len(sssr) == n_sssr: - return sorted(sssr, key=len) - - raise ImplementationError('SSSR count not reached') - - __all__ = ['Rings'] diff --git a/pyproject.toml b/pyproject.toml index cd333010..ad15a8ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = 'chython' -version = '2.6' +version = '2.7' description = 'Library for processing molecules and reactions in python way' authors = ['Ramil Nugmanov '] license = 'LGPLv3' From 95e0abddbde44d5794a97c544392b04819f812b1 Mon Sep 17 00:00:00 2001 From: Ramil Nugmanov Date: Sat, 23 Aug 2025 20:16:42 +0200 Subject: [PATCH 16/16] optimization --- chython/algorithms/rings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chython/algorithms/rings.py b/chython/algorithms/rings.py index 62722a7d..8f5f7ccd 100644 --- a/chython/algorithms/rings.py +++ b/chython/algorithms/rings.py @@ -43,7 +43,7 @@ def sssr(self) -> List[Tuple[int, ...]]: :return rings atoms numbers """ if self.rings_count: - return sssr(self.not_special_connectivity, self.rings_count) + return sssr(_skin_graph(self.not_special_connectivity), self.rings_count) return [] @cached_property