diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index e61d27b..0000000 --- a/.coveragerc +++ /dev/null @@ -1,10 +0,0 @@ -[run] -source = smmap - -; to make nosetests happy -[report] -omit = - */yaml* - */tests/* - */python?.?/* - */site-packages/nose/* \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 203f3c8..0000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,6 +0,0 @@ -version: 2 -updates: -- package-ecosystem: "github-actions" - directory: "/" - schedule: - interval: "weekly" diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml deleted file mode 100644 index ab0a8ef..0000000 --- a/.github/workflows/pythonpackage.yml +++ /dev/null @@ -1,46 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: Python package - -on: [push, pull_request, workflow_dispatch] - -jobs: - build: - strategy: - matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] - include: - - experimental: false - - os: ubuntu-latest - - python-version: "3.7" - os: ubuntu-22.04 - - runs-on: ${{ matrix.os }} - - continue-on-error: ${{ matrix.experimental }} - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - allow-prereleases: ${{ matrix.experimental }} - - name: Install project - run: | - python -m pip install --upgrade pip - pip install . - - name: Lint with flake8 - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test - run: | - pip install pytest - ulimit -n 48 - ulimit -n - pytest -v diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 85cc74f..0000000 --- a/.gitignore +++ /dev/null @@ -1,12 +0,0 @@ -*.pyc -build/ -.coverage -coverage -cover/ -dist/ -MANIFEST -.tox -*.egg-info -.noseids -*.sublime-workspace -/env/ diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 710010f..0000000 --- a/LICENSE +++ /dev/null @@ -1,30 +0,0 @@ -Copyright (C) 2010, 2011 Sebastian Thiel and contributors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -* Neither the name of the async project nor the names of -its contributors may be used to endorse or promote products derived -from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 399a207..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -# Include the license file -include LICENSE diff --git a/Makefile b/Makefile deleted file mode 100644 index 20436bb..0000000 --- a/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -.PHONY: all clean release force_release - -all: - @grep -Ee '^[a-z].*:' Makefile | cut -d: -f1 | grep -vF all - -clean: - rm -rf build/ dist/ .eggs/ .tox/ - -force_release: clean - ./build-release.sh - twine upload dist/* - git push --tags origin master diff --git a/README.md b/README.md deleted file mode 100644 index e21f534..0000000 --- a/README.md +++ /dev/null @@ -1,83 +0,0 @@ -## Motivation - -When reading from many possibly large files in a fashion similar to random access, it is usually the fastest and most efficient to use memory maps. - -Although memory maps have many advantages, they represent a very limited system resource as every map uses one file descriptor, whose amount is limited per process. On 32 bit systems, the amount of memory you can have mapped at a time is naturally limited to theoretical 4GB of memory, which may not be enough for some applications. - - -## Limitations - -* **System resources (file-handles) are likely to be leaked!** This is due to the library authors reliance on a deterministic `__del__()` destructor. -* The memory access is read-only by design. - - -## Overview - -![Python package](https://github.com/gitpython-developers/smmap/workflows/Python%20package/badge.svg) - -Smmap wraps an interface around mmap and tracks the mapped files as well as the amount of clients who use it. If the system runs out of resources, or if a memory limit is reached, it will automatically unload unused maps to allow continued operation. - -To allow processing large files even on 32 bit systems, it allows only portions of the file to be mapped. Once the user reads beyond the mapped region, smmap will automatically map the next required region, unloading unused regions using a LRU algorithm. - -Although the library can be used most efficiently with its native interface, a Buffer implementation is provided to hide these details behind a simple string-like interface. - -For performance critical 64 bit applications, a simplified version of memory mapping is provided which always maps the whole file, but still provides the benefit of unloading unused mappings on demand. - - - -## Prerequisites - -* Python 3.7+ -* OSX, Windows or Linux - -The package was tested on all of the previously mentioned configurations. - -## Installing smmap - -[![Documentation Status](https://readthedocs.org/projects/smmap/badge/?version=latest)](https://readthedocs.org/projects/smmap/?badge=latest) - -Its easiest to install smmap using the [pip](http://www.pip-installer.org/en/latest) program: - -```bash -$ pip install smmap -``` - -As the command will install smmap in your respective python distribution, you will most likely need root permissions to authorize the required changes. - -If you have downloaded the source archive, the package can be installed by running the `setup.py` script: - -```bash -$ python setup.py install -``` - -It is advised to have a look at the **Usage Guide** for a brief introduction on the different database implementations. - - - -## Homepage and Links - -The project is home on github at https://github.com/gitpython-developers/smmap . - -The latest source can be cloned from github as well: - -* git://github.com/gitpython-developers/smmap.git - - -For support, please use the git-python mailing list: - -* http://groups.google.com/group/git-python - - -Issues can be filed on github: - -* https://github.com/gitpython-developers/smmap/issues - -A link to the pypi page related to this repository: - -* https://pypi.org/project/smmap/ - - -## License Information - -*smmap* is licensed under the New BSD License. - diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index 9e0c0d1..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,3 +0,0 @@ -# Security Policy - -See [GitPython](https://github.com/gitpython-developers/GitPython/blob/main/SECURITY.md). Vulnerabilities found in `smmap` can be reported there. diff --git a/build-release.sh b/build-release.sh deleted file mode 100755 index 5840e44..0000000 --- a/build-release.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# -# This script builds a release. If run in a venv, it auto-installs its tools. -# You may want to run "make release" instead of running this script directly. - -set -eEu - -function release_with() { - $1 -m build --sdist --wheel -} - -if test -n "${VIRTUAL_ENV:-}"; then - deps=(build twine) # Install twine along with build, as we need it later. - echo "Virtual environment detected. Adding packages: ${deps[*]}" - pip install --quiet --upgrade "${deps[@]}" - echo 'Starting the build.' - release_with python -else - function suggest_venv() { - venv_cmd='python -m venv env && source env/bin/activate' - printf "HELP: To avoid this error, use a virtual-env with '%s' instead.\n" "$venv_cmd" - } - trap suggest_venv ERR # This keeps the original exit (error) code. - echo 'Starting the build.' - release_with python3 # Outside a venv, use python3. -fi diff --git a/doc/.gitignore b/doc/.gitignore deleted file mode 100644 index 32060ac..0000000 --- a/doc/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -build -*.version_info diff --git a/doc/Makefile b/doc/Makefile deleted file mode 100644 index 675ec20..0000000 --- a/doc/Makefile +++ /dev/null @@ -1,89 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = build - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source - -.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - -clean: - -rm -rf $(BUILDDIR)/* - -html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/smmap.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/smmap.qhc" - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ - "run these through (pdf)latex." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." diff --git a/doc/make.bat b/doc/make.bat deleted file mode 100644 index 6900a2a..0000000 --- a/doc/make.bat +++ /dev/null @@ -1,113 +0,0 @@ -@ECHO OFF - -REM Command file for Sphinx documentation - -set SPHINXBUILD=sphinx-build -set BUILDDIR=build -set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source -if NOT "%PAPER%" == "" ( - set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% -) - -if "%1" == "" goto help - -if "%1" == "help" ( - :help - echo.Please use `make ^` where ^ is one of - echo. html to make standalone HTML files - echo. dirhtml to make HTML files named index.html in directories - echo. pickle to make pickle files - echo. json to make JSON files - echo. htmlhelp to make HTML files and a HTML help project - echo. qthelp to make HTML files and a qthelp project - echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter - echo. changes to make an overview over all changed/added/deprecated items - echo. linkcheck to check all external links for integrity - echo. doctest to run all doctests embedded in the documentation if enabled - goto end -) - -if "%1" == "clean" ( - for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i - del /q /s %BUILDDIR%\* - goto end -) - -if "%1" == "html" ( - %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/html. - goto end -) - -if "%1" == "dirhtml" ( - %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml - echo. - echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. - goto end -) - -if "%1" == "pickle" ( - %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle - echo. - echo.Build finished; now you can process the pickle files. - goto end -) - -if "%1" == "json" ( - %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json - echo. - echo.Build finished; now you can process the JSON files. - goto end -) - -if "%1" == "htmlhelp" ( - %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp - echo. - echo.Build finished; now you can run HTML Help Workshop with the ^ -.hhp project file in %BUILDDIR%/htmlhelp. - goto end -) - -if "%1" == "qthelp" ( - %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp - echo. - echo.Build finished; now you can run "qcollectiongenerator" with the ^ -.qhcp project file in %BUILDDIR%/qthelp, like this: - echo.^> qcollectiongenerator %BUILDDIR%\qthelp\smmap.qhcp - echo.To view the help file: - echo.^> assistant -collectionFile %BUILDDIR%\qthelp\smmap.ghc - goto end -) - -if "%1" == "latex" ( - %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex - echo. - echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. - goto end -) - -if "%1" == "changes" ( - %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes - echo. - echo.The overview file is in %BUILDDIR%/changes. - goto end -) - -if "%1" == "linkcheck" ( - %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck - echo. - echo.Link check complete; look for any errors in the above output ^ -or in %BUILDDIR%/linkcheck/output.txt. - goto end -) - -if "%1" == "doctest" ( - %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest - echo. - echo.Testing of doctests in the sources finished, look at the ^ -results in %BUILDDIR%/doctest/output.txt. - goto end -) - -:end diff --git a/doc/source/api.rst b/doc/source/api.rst deleted file mode 100644 index 2e2dac4..0000000 --- a/doc/source/api.rst +++ /dev/null @@ -1,34 +0,0 @@ -.. _api-label: - -############# -API Reference -############# - -*********************** -Mapped Memory Managers -*********************** - -.. automodule:: smmap.mman - :members: - :undoc-members: - -******* -Buffers -******* - -.. automodule:: smmap.buf - :members: - :undoc-members: - -********* -Utilities -********* - -.. automodule:: smmap.util - :members: - :undoc-members: - - - - - diff --git a/doc/source/changes.rst b/doc/source/changes.rst deleted file mode 100644 index dc243eb..0000000 --- a/doc/source/changes.rst +++ /dev/null @@ -1,100 +0,0 @@ -######### -Changelog -######### - -****** -v5.0.2 -****** - -- remove a usage of mktemp - -****** -v5.0.1 -****** - -- Added support for Python 3.12 - -****** -v6.0.0 -****** - -YANKED - -- Dropped support 3.6 and 3.7 -- Declared support for Python 3.11 and 3.12 - -****** -v5.0.0 -****** - -- Dropped support 3.5 -- Added support for Python 3.10 - -****** -v4.0.0 -****** - -- Dropped support for Python 2.7 and 3.4 -- Added support for Python 3.7, 3.8, and 3.9 -- Removed unused exc.MemoryManagerError and exc.RegionCollectionError - -****** -v3.0.5 -****** - -- Restored Python 2 support removed in v3.0.2 -- Changed release signature key to 27C50E7F590947D7273A741E85194C08421980C9. - See https://keybase.io/byronbates for proof of ownership. - -****** -v3.0.4 -****** - -- Signed release (with correct key this time) - -****** -v3.0.2 -****** - -- Signed release -- Switched to GitHub Actions for CI - -****** -v3.0.1 -****** -- Switched back to the smmap package name on PyPI and fixed the smmap2 mirror package - (`#44 `_) -- Fixed setup.py ``long_description`` rendering - (`#40 `_) - -********** -v0.9.0 -********** -- Fixed issue with resources never being freed as mmaps were never closed. -- Client counting is now done manually, instead of relying on pyton's reference count - -********** -v0.8.5 -********** -- Fixed Python 3.0-3.3 regression, which also causes smmap to become about 3 times slower depending on the code path. It's related to this bug (http://bugs.python.org/issue15958), which was fixed in python 3.4 - -********** -v0.8.4 -********** -- Fixed Python 3 performance regression - -********** -v0.8.3 -********** -- Cleaned up code and assured it works sufficiently well with python 3 - -********** -v0.8.1 -********** -- A single bugfix - -********** -v0.8.0 -********** - -- Initial Release diff --git a/doc/source/conf.py b/doc/source/conf.py deleted file mode 100644 index 55dfc5c..0000000 --- a/doc/source/conf.py +++ /dev/null @@ -1,194 +0,0 @@ -# -# smmap documentation build configuration file, created by -# sphinx-quickstart on Wed Jun 8 15:14:25 2011. -# -# This file is execfile()d with the current directory set to its containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.append(os.path.abspath('../../')) - -# -- General configuration ----------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['.templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'smmap' -copyright = '2011, Sebastian Thiel' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '0.8.0' -# The full version, including alpha/beta/rc tags. -release = '0.8.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of documents that shouldn't be included in the build. -#unused_docs = [] - -# List of directories, relative to source directory, that shouldn't be searched -# for source files. -exclude_trees = [] - -# The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - - -# -- Options for HTML output --------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. Major themes that come with -# Sphinx are currently 'default' and 'sphinxdoc'. -html_theme = 'default' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['.static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_use_modindex = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = '' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'smmapdoc' - - -# -- Options for LaTeX output -------------------------------------------------- - -# The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' - -# The font size ('10pt', '11pt' or '12pt'). -#latex_font_size = '10pt' - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass [howto/manual]). -latex_documents = [ - ('index', 'smmap.tex', 'smmap Documentation', - 'Sebastian Thiel', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# Additional stuff for the LaTeX preamble. -#latex_preamble = '' - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_use_modindex = True diff --git a/doc/source/index.rst b/doc/source/index.rst deleted file mode 100644 index d25ef82..0000000 --- a/doc/source/index.rst +++ /dev/null @@ -1,26 +0,0 @@ -.. smmap documentation master file, created by - sphinx-quickstart on Wed Jun 8 15:14:25 2011. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to smmap's documentation! -================================= -**smmap** is a pure python implementation of a sliding memory map to help unifying memory mapped access on 32 and 64 bit systems and to help managing resources more efficiently. - -Contents: - -.. toctree:: - :maxdepth: 2 - - intro - tutorial - api - changes - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` - diff --git a/doc/source/intro.rst b/doc/source/intro.rst deleted file mode 100644 index 109fec2..0000000 --- a/doc/source/intro.rst +++ /dev/null @@ -1,72 +0,0 @@ -########### -Motivation -########### -When reading from many possibly large files in a fashion similar to random access, it is usually the fastest and most efficient to use memory maps. - -Although memory maps have many advantages, they represent a very limited system resource as every map uses one file descriptor, whose amount is limited per process. On 32 bit systems, the amount of memory you can have mapped at a time is naturally limited to theoretical 4GB of memory, which may not be enough for some applications. - -######## -Overview -######## - -Smmap wraps an interface around mmap and tracks the mapped files as well as the amount of clients who use it. If the system runs out of resources, or if a memory limit is reached, it will automatically unload unused maps to allow continued operation. - -To allow processing large files even on 32 bit systems, it allows only portions of the file to be mapped. Once the user reads beyond the mapped region, smmap will automatically map the next required region, unloading unused regions using a LRU algorithm. - -Although the library can be used most efficiently with its native interface, a Buffer implementation is provided to hide these details behind a simple string-like interface. - -For performance critical 64 bit applications, a simplified version of memory mapping is provided which always maps the whole file, but still provides the benefit of unloading unused mappings on demand. - -############# -Prerequisites -############# -* Python 3.7+ -* OSX, Windows or Linux - -The package was tested on all of the previously mentioned configurations. - -########### -Limitations -########### -* The memory access is read-only by design. - -################ -Installing smmap -################ -Its easiest to install smmap using the *pip* program:: - - $ pip install smmap - -As the command will install smmap in your respective python distribution, you will most likely need root permissions to authorize the required changes. - -If you have downloaded the source archive, the package can be installed by running the ``setup.py`` script:: - - $ python setup.py install - -It is advised to have a look at the :ref:`Usage Guide ` for a brief introduction on the different database implementations. - -################## -Homepage and Links -################## -The project is home on github at `https://github.com/gitpython-developers/smmap `_. - -The latest source can be cloned from github as well: - - * git://github.com/gitpython-developers/smmap.git - - -For support, please use the git-python mailing list: - - * http://groups.google.com/group/git-python - - -Issues can be filed on github: - - * https://github.com/gitpython-developers/smmap/issues - -################### -License Information -################### -*smmap* is licensed under the New BSD License. - -.. _pip: http://www.pip-installer.org/en/latest/ diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst deleted file mode 100644 index 917b245..0000000 --- a/doc/source/tutorial.rst +++ /dev/null @@ -1,118 +0,0 @@ -.. _tutorial-label: - -########### -Usage Guide -########### -This text briefly introduces you to the basic design decisions and accompanying classes. - -****** -Design -****** -Per application, there is *MemoryManager* which is held as static instance and used throughout the application. It can be configured to keep your resources within certain limits. - -To access mapped regions, you require a cursor. Cursors point to exactly one file and serve as handles into it. As long as it exists, the respective memory region will remain available. - -For convenience, a buffer implementation is provided which handles cursors and resource allocation behind its simple buffer like interface. - -*************** -Memory Managers -*************** -There are two types of memory managers, one uses *static* windows, the other one uses *sliding* windows. A window is a region of a file mapped into memory. Although the names might be somewhat misleading as technically windows are always static, the *sliding* version will allocate relatively small windows whereas the *static* version will always map the whole file. - -The *static* manager does nothing more than keeping a client count on the respective memory maps which always map the whole file, which allows to make some assumptions that can lead to simplified data access and increased performance, but reduces the compatibility to 32 bit systems or giant files. - -The *sliding* memory manager therefore should be the default manager when preparing an application for handling huge amounts of data on 32 bit and 64 bit platforms:: - - import smmap - # This instance should be globally available in your application - # It is configured to be well suitable for 32-bit or 64 bit applications. - mman = smmap.SlidingWindowMapManager() - - # the manager provides much useful information about its current state - # like the amount of open file handles or the amount of mapped memory - mman.num_file_handles() - mman.mapped_memory_size() - # and many more ... - - -Cursors -******* -*Cursors* are handles that point onto a window, i.e. a region of a file mapped into memory. From them you may obtain a buffer through which the data of that window can actually be accessed:: - - import smmap.test.lib - fc = smmap.test.lib.FileCreator(1024*1024*8, "test_file") - - # obtain a cursor to access some file. - c = mman.make_cursor(fc.path) - - # the cursor is now associated with the file, but not yet usable - assert c.is_associated() - assert not c.is_valid() - - # before you can use the cursor, you have to specify a window you want to - # access. The following just says you want as much data as possible starting - # from offset 0. - # To be sure your region could be mapped, query for validity - assert c.use_region().is_valid() # use_region returns self - - # once a region was mapped, you must query its dimension regularly - # to assure you don't try to access its buffer out of its bounds - assert c.size() - c.buffer()[0] # first byte - c.buffer()[1:10] # first 9 bytes - c.buffer()[c.size()-1] # last byte - - # its recommended not to create big slices when feeding the buffer - # into consumers (e.g. struct or zlib). - # Instead, either give the buffer directly, or use pythons buffer command. - buffer(c.buffer(), 1, 9) # first 9 bytes without copying them - - # you can query absolute offsets, and check whether an offset is included - # in the cursor's data. - assert c.ofs_begin() < c.ofs_end() - assert c.includes_ofs(100) - - # If you are over out of bounds with one of your region requests, the - # cursor will be come invalid. It cannot be used in that state - assert not c.use_region(fc.size, 100).is_valid() - # map as much as possible after skipping the first 100 bytes - assert c.use_region(100).is_valid() - - # You can explicitly free cursor resources by unusing the cursor's region - c.unuse_region() - assert not c.is_valid() - - -Now you would have to write your algorithms around this interface to properly slide through huge amounts of data. - -Alternatively you can use a convenience interface. - -******* -Buffers -******* -To make first use easier, at the expense of performance, there is a Buffer implementation which uses a cursor underneath. - -With it, you can access all data in a possibly huge file without having to take care of setting the cursor to different regions yourself:: - - # Create a default buffer which can operate on the whole file - buf = smmap.SlidingWindowMapBuffer(mman.make_cursor(fc.path)) - - # you can use it right away - assert buf.cursor().is_valid() - - buf[0] # access the first byte - buf[-1] # access the last ten bytes on the file - buf[-10:]# access the last ten bytes - - # If you want to keep the instance between different accesses, use the - # dedicated methods - buf.end_access() - assert not buf.cursor().is_valid() # you cannot use the buffer anymore - assert buf.begin_access(offset=10) # start using the buffer at an offset - - # it will stop using resources automatically once it goes out of scope - -Disadvantages -************* -Buffers cannot be used in place of strings or maps, hence you have to slice them to have valid input for the sorts of struct and zlib. A slice means a lot of data handling overhead which makes buffers slower compared to using cursors directly. - diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 38c535b..0000000 --- a/setup.cfg +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -exclude = .tox,.venv,build,dist,doc diff --git a/setup.py b/setup.py old mode 100755 new mode 100644 index 7deb178..b3e3990 --- a/setup.py +++ b/setup.py @@ -1,52 +1,12 @@ -#!/usr/bin/env python -import os - -try: - from setuptools import setup, find_packages -except ImportError: - from ez_setup import use_setuptools - use_setuptools() - from setuptools import setup, find_packages - -import smmap - -if os.path.exists("README.md"): - long_description = open('README.md', encoding="utf-8").read().replace('\r\n', '\n') -else: - long_description = "See https://github.com/gitpython-developers/smmap" +from setuptools import setup setup( - name="smmap", - version=smmap.__version__, - description="A pure Python implementation of a sliding window memory map manager", - author=smmap.__author__, - author_email=smmap.__contact__, - url=smmap.__homepage__, - platforms=["any"], - license="BSD-3-Clause", - packages=find_packages(), - zip_safe=True, - python_requires=">=3.7", - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Environment :: Console", - "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", - "Operating System :: OS Independent", - "Operating System :: POSIX", - "Operating System :: Microsoft :: Windows", - "Operating System :: MacOS :: MacOS X", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: 3.13", - "Programming Language :: Python :: 3 :: Only", - ], - long_description=long_description, - long_description_content_type='text/markdown', + name="smmap2", + version="3.0.1", + author="Sebastian Thiel", + author_email="byronimo@gmail.com", + description="A mirror package for smmap", + long_description="This is a mirror package for `smmap `_. Consider installing it directly instead.", + url="https://github.com/gitpython-developers/smmap", + install_requires=["smmap>=3.0.1"], ) diff --git a/smmap/__init__.py b/smmap/__init__.py deleted file mode 100644 index 657c8c5..0000000 --- a/smmap/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -"""Initialize the smmap package""" - -__author__ = "Sebastian Thiel" -__contact__ = "byronimo@gmail.com" -__homepage__ = "https://github.com/gitpython-developers/smmap" -version_info = (5, 0, 2) -__version__ = '.'.join(str(i) for i in version_info) - -# make everything available in root package for convenience -from .mman import * -from .buf import * diff --git a/smmap/buf.py b/smmap/buf.py deleted file mode 100644 index 731b064..0000000 --- a/smmap/buf.py +++ /dev/null @@ -1,143 +0,0 @@ -"""Module with a simple buffer implementation using the memory manager""" -import sys - -__all__ = ["SlidingWindowMapBuffer"] - - -class SlidingWindowMapBuffer: - - """A buffer like object which allows direct byte-wise object and slicing into - memory of a mapped file. The mapping is controlled by the provided cursor. - - The buffer is relative, that is if you map an offset, index 0 will map to the - first byte at the offset you used during initialization or begin_access - - **Note:** Although this type effectively hides the fact that there are mapped windows - underneath, it can unfortunately not be used in any non-pure python method which - needs a buffer or string""" - __slots__ = ( - '_c', # our cursor - '_size', # our supposed size - ) - - def __init__(self, cursor=None, offset=0, size=sys.maxsize, flags=0): - """Initialize the instance to operate on the given cursor. - :param cursor: if not None, the associated cursor to the file you want to access - If None, you have call begin_access before using the buffer and provide a cursor - :param offset: absolute offset in bytes - :param size: the total size of the mapping. Defaults to the maximum possible size - From that point on, the __len__ of the buffer will be the given size or the file size. - If the size is larger than the mappable area, you can only access the actually available - area, although the length of the buffer is reported to be your given size. - Hence it is in your own interest to provide a proper size ! - :param flags: Additional flags to be passed to os.open - :raise ValueError: if the buffer could not achieve a valid state""" - self._c = cursor - if cursor and not self.begin_access(cursor, offset, size, flags): - raise ValueError("Failed to allocate the buffer - probably the given offset is out of bounds") - # END handle offset - - def __del__(self): - self.end_access() - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.end_access() - - def __len__(self): - return self._size - - def __getitem__(self, i): - if isinstance(i, slice): - return self.__getslice__(i.start or 0, i.stop or self._size) - c = self._c - assert c.is_valid() - if i < 0: - i = self._size + i - if not c.includes_ofs(i): - c.use_region(i, 1) - # END handle region usage - return c.buffer()[i - c.ofs_begin()] - - def __getslice__(self, i, j): - c = self._c - # fast path, slice fully included - safes a concatenate operation and - # should be the default - assert c.is_valid() - if i < 0: - i = self._size + i - if j == sys.maxsize: - j = self._size - if j < 0: - j = self._size + j - if (c.ofs_begin() <= i) and (j < c.ofs_end()): - b = c.ofs_begin() - return c.buffer()[i - b:j - b] - else: - l = j - i # total length - ofs = i - # It's fastest to keep tokens and join later, especially in py3, which was 7 times slower - # in the previous iteration of this code - md = list() - while l: - c.use_region(ofs, l) - assert c.is_valid() - d = c.buffer()[:l] - ofs += len(d) - l -= len(d) - # Make sure we don't keep references, as c.use_region() might attempt to free resources, but - # can't unless we use pure bytes - if hasattr(d, 'tobytes'): - d = d.tobytes() - md.append(d) - # END while there are bytes to read - return b''.join(md) - # END fast or slow path - #{ Interface - - def begin_access(self, cursor=None, offset=0, size=sys.maxsize, flags=0): - """Call this before the first use of this instance. The method was already - called by the constructor in case sufficient information was provided. - - For more information no the parameters, see the __init__ method - :param path: if cursor is None the existing one will be used. - :return: True if the buffer can be used""" - if cursor: - self._c = cursor - # END update our cursor - - # reuse existing cursors if possible - if self._c is not None and self._c.is_associated(): - res = self._c.use_region(offset, size, flags).is_valid() - if res: - # if given size is too large or default, we computer a proper size - # If its smaller, we assume the combination between offset and size - # as chosen by the user is correct and use it ! - # If not, the user is in trouble. - if size > self._c.file_size(): - size = self._c.file_size() - offset - # END handle size - self._size = size - # END set size - return res - # END use our cursor - return False - - def end_access(self): - """Call this method once you are done using the instance. It is automatically - called on destruction, and should be called just in time to allow system - resources to be freed. - - Once you called end_access, you must call begin access before reusing this instance!""" - self._size = 0 - if self._c is not None: - self._c.unuse_region() - # END unuse region - - def cursor(self): - """:return: the currently set cursor which provides access to the data""" - return self._c - - #}END interface diff --git a/smmap/mman.py b/smmap/mman.py deleted file mode 100644 index 873f687..0000000 --- a/smmap/mman.py +++ /dev/null @@ -1,588 +0,0 @@ -"""Module containing a memory memory manager which provides a sliding window on a number of memory mapped files""" -from .util import ( - MapWindow, - MapRegion, - MapRegionList, - is_64_bit, -) - -import sys -from functools import reduce - -__all__ = ["StaticWindowMapManager", "SlidingWindowMapManager", "WindowCursor"] -#{ Utilities - -#}END utilities - - -class WindowCursor: - - """ - Pointer into the mapped region of the memory manager, keeping the map - alive until it is destroyed and no other client uses it. - - Cursors should not be created manually, but are instead returned by the SlidingWindowMapManager - - **Note:**: The current implementation is suited for static and sliding window managers, but it also means - that it must be suited for the somewhat quite different sliding manager. It could be improved, but - I see no real need to do so.""" - __slots__ = ( - '_manager', # the manager keeping all file regions - '_rlist', # a regions list with regions for our file - '_region', # our current class:`MapRegion` or None - '_ofs', # relative offset from the actually mapped area to our start area - '_size' # maximum size we should provide - ) - - def __init__(self, manager=None, regions=None): - self._manager = manager - self._rlist = regions - self._region = None - self._ofs = 0 - self._size = 0 - - def __del__(self): - self._destroy() - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self._destroy() - - def _destroy(self): - """Destruction code to decrement counters""" - self.unuse_region() - - if self._rlist is not None: - # Actual client count, which doesn't include the reference kept by the manager, nor ours - # as we are about to be deleted - try: - if len(self._rlist) == 0: - # Free all resources associated with the mapped file - self._manager._fdict.pop(self._rlist.path_or_fd()) - # END remove regions list from manager - except (TypeError, KeyError): - # sometimes, during shutdown, getrefcount is None. Its possible - # to re-import it, however, its probably better to just ignore - # this python problem (for now). - # The next step is to get rid of the error prone getrefcount altogether. - pass - # END exception handling - # END handle regions - - def _copy_from(self, rhs): - """Copy all data from rhs into this instance, handles usage count""" - self._manager = rhs._manager - self._rlist = type(rhs._rlist)(rhs._rlist) - self._region = rhs._region - self._ofs = rhs._ofs - self._size = rhs._size - - for region in self._rlist: - region.increment_client_count() - - if self._region is not None: - self._region.increment_client_count() - # END handle regions - - def __copy__(self): - """copy module interface""" - cpy = type(self)() - cpy._copy_from(self) - return cpy - - #{ Interface - def assign(self, rhs): - """Assign rhs to this instance. This is required in order to get a real copy. - Alternatively, you can copy an existing instance using the copy module""" - self._destroy() - self._copy_from(rhs) - - def use_region(self, offset=0, size=0, flags=0): - """Assure we point to a window which allows access to the given offset into the file - - :param offset: absolute offset in bytes into the file - :param size: amount of bytes to map. If 0, all available bytes will be mapped - :param flags: additional flags to be given to os.open in case a file handle is initially opened - for mapping. Has no effect if a region can actually be reused. - :return: this instance - it should be queried for whether it points to a valid memory region. - This is not the case if the mapping failed because we reached the end of the file - - **Note:**: The size actually mapped may be smaller than the given size. If that is the case, - either the file has reached its end, or the map was created between two existing regions""" - need_region = True - man = self._manager - fsize = self._rlist.file_size() - size = min(size or fsize, man.window_size() or fsize) # clamp size to window size - - if self._region is not None: - if self._region.includes_ofs(offset): - need_region = False - else: - self.unuse_region() - # END handle existing region - # END check existing region - - # offset too large ? - if offset >= fsize: - return self - # END handle offset - - if need_region: - self._region = man._obtain_region(self._rlist, offset, size, flags, False) - self._region.increment_client_count() - # END need region handling - - self._ofs = offset - self._region._b - self._size = min(size, self._region.ofs_end() - offset) - - return self - - def unuse_region(self): - """Unuse the current region. Does nothing if we have no current region - - **Note:** the cursor unuses the region automatically upon destruction. It is recommended - to un-use the region once you are done reading from it in persistent cursors as it - helps to free up resource more quickly""" - if self._region is not None: - self._region.increment_client_count(-1) - self._region = None - # note: should reset ofs and size, but we spare that for performance. Its not - # allowed to query information if we are not valid ! - - def buffer(self): - """Return a buffer object which allows access to our memory region from our offset - to the window size. Please note that it might be smaller than you requested when calling use_region() - - **Note:** You can only obtain a buffer if this instance is_valid() ! - - **Note:** buffers should not be cached passed the duration of your access as it will - prevent resources from being freed even though they might not be accounted for anymore !""" - return memoryview(self._region.buffer())[self._ofs:self._ofs+self._size] - - def map(self): - """ - :return: the underlying raw memory map. Please not that the offset and size is likely to be different - to what you set as offset and size. Use it only if you are sure about the region it maps, which is the whole - file in case of StaticWindowMapManager""" - return self._region.map() - - def is_valid(self): - """:return: True if we have a valid and usable region""" - return self._region is not None - - def is_associated(self): - """:return: True if we are associated with a specific file already""" - return self._rlist is not None - - def ofs_begin(self): - """:return: offset to the first byte pointed to by our cursor - - **Note:** only if is_valid() is True""" - return self._region._b + self._ofs - - def ofs_end(self): - """:return: offset to one past the last available byte""" - # unroll method calls for performance ! - return self._region._b + self._ofs + self._size - - def size(self): - """:return: amount of bytes we point to""" - return self._size - - def region(self): - """:return: our mapped region, or None if nothing is mapped yet - :raise AssertionError: if we have no current region. This is only useful for debugging""" - return self._region - - def includes_ofs(self, ofs): - """:return: True if the given absolute offset is contained in the cursors - current region - - **Note:** cursor must be valid for this to work""" - # unroll methods - return (self._region._b + self._ofs) <= ofs < (self._region._b + self._ofs + self._size) - - def file_size(self): - """:return: size of the underlying file""" - return self._rlist.file_size() - - def path_or_fd(self): - """:return: path or file descriptor of the underlying mapped file""" - return self._rlist.path_or_fd() - - def path(self): - """:return: path of the underlying mapped file - :raise ValueError: if attached path is not a path""" - if isinstance(self._rlist.path_or_fd(), int): - raise ValueError("Path queried although mapping was applied to a file descriptor") - # END handle type - return self._rlist.path_or_fd() - - def fd(self): - """:return: file descriptor used to create the underlying mapping. - - **Note:** it is not required to be valid anymore - :raise ValueError: if the mapping was not created by a file descriptor""" - if isinstance(self._rlist.path_or_fd(), str): - raise ValueError("File descriptor queried although mapping was generated from path") - # END handle type - return self._rlist.path_or_fd() - - #} END interface - - -class StaticWindowMapManager: - - """Provides a manager which will produce single size cursors that are allowed - to always map the whole file. - - Clients must be written to specifically know that they are accessing their data - through a StaticWindowMapManager, as they otherwise have to deal with their window size. - - These clients would have to use a SlidingWindowMapBuffer to hide this fact. - - This type will always use a maximum window size, and optimize certain methods to - accommodate this fact""" - - __slots__ = [ - '_fdict', # mapping of path -> StorageHelper (of some kind - '_window_size', # maximum size of a window - '_max_memory_size', # maximum amount of memory we may allocate - '_max_handle_count', # maximum amount of handles to keep open - '_memory_size', # currently allocated memory size - '_handle_count', # amount of currently allocated file handles - ] - - #{ Configuration - MapRegionListCls = MapRegionList - MapWindowCls = MapWindow - MapRegionCls = MapRegion - WindowCursorCls = WindowCursor - #} END configuration - - _MB_in_bytes = 1024 * 1024 - - def __init__(self, window_size=0, max_memory_size=0, max_open_handles=sys.maxsize): - """initialize the manager with the given parameters. - :param window_size: if -1, a default window size will be chosen depending on - the operating system's architecture. It will internally be quantified to a multiple of the page size - If 0, the window may have any size, which basically results in mapping the whole file at one - :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions. - If 0, a viable default will be set depending on the system's architecture. - It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate - :param max_open_handles: if not maxint, limit the amount of open file handles to the given number. - Otherwise the amount is only limited by the system itself. If a system or soft limit is hit, - the manager will free as many handles as possible""" - self._fdict = dict() - self._window_size = window_size - self._max_memory_size = max_memory_size - self._max_handle_count = max_open_handles - self._memory_size = 0 - self._handle_count = 0 - - if window_size < 0: - coeff = 64 - if is_64_bit(): - coeff = 1024 - # END handle arch - self._window_size = coeff * self._MB_in_bytes - # END handle max window size - - if max_memory_size == 0: - coeff = 1024 - if is_64_bit(): - coeff = 8192 - # END handle arch - self._max_memory_size = coeff * self._MB_in_bytes - # END handle max memory size - - #{ Internal Methods - - def _collect_lru_region(self, size): - """Unmap the region which was least-recently used and has no client - :param size: size of the region we want to map next (assuming its not already mapped partially or full - if 0, we try to free any available region - :return: Amount of freed regions - - .. Note:: - We don't raise exceptions anymore, in order to keep the system working, allowing temporary overallocation. - If the system runs out of memory, it will tell. - - .. TODO:: - implement a case where all unusued regions are discarded efficiently. - Currently its only brute force - """ - num_found = 0 - while (size == 0) or (self._memory_size + size > self._max_memory_size): - lru_region = None - lru_list = None - for regions in self._fdict.values(): - for region in regions: - # check client count - if it's 1, it's just us - if (region.client_count() == 1 and - (lru_region is None or region._uc < lru_region._uc)): - lru_region = region - lru_list = regions - # END update lru_region - # END for each region - # END for each regions list - - if lru_region is None: - break - # END handle region not found - - num_found += 1 - del(lru_list[lru_list.index(lru_region)]) - lru_region.increment_client_count(-1) - self._memory_size -= lru_region.size() - self._handle_count -= 1 - # END while there is more memory to free - return num_found - - def _obtain_region(self, a, offset, size, flags, is_recursive): - """Utility to create a new region - for more information on the parameters, - see MapCursor.use_region. - :param a: A regions (a)rray - :return: The newly created region""" - if self._memory_size + size > self._max_memory_size: - self._collect_lru_region(size) - # END handle collection - - r = None - if a: - assert len(a) == 1 - r = a[0] - else: - try: - r = self.MapRegionCls(a.path_or_fd(), 0, sys.maxsize, flags) - except Exception: - # apparently we are out of system resources or hit a limit - # As many more operations are likely to fail in that condition ( - # like reading a file from disk, etc) we free up as much as possible - # As this invalidates our insert position, we have to recurse here - if is_recursive: - # we already tried this, and still have no success in obtaining - # a mapping. This is an exception, so we propagate it - raise - # END handle existing recursion - self._collect_lru_region(0) - return self._obtain_region(a, offset, size, flags, True) - # END handle exceptions - - self._handle_count += 1 - self._memory_size += r.size() - a.append(r) - # END handle array - - assert r.includes_ofs(offset) - return r - - #}END internal methods - - #{ Interface - def make_cursor(self, path_or_fd): - """ - :return: a cursor pointing to the given path or file descriptor. - It can be used to map new regions of the file into memory - - **Note:** if a file descriptor is given, it is assumed to be open and valid, - but may be closed afterwards. To refer to the same file, you may reuse - your existing file descriptor, but keep in mind that new windows can only - be mapped as long as it stays valid. This is why the using actual file paths - are preferred unless you plan to keep the file descriptor open. - - **Note:** file descriptors are problematic as they are not necessarily unique, as two - different files opened and closed in succession might have the same file descriptor id. - - **Note:** Using file descriptors directly is faster once new windows are mapped as it - prevents the file to be opened again just for the purpose of mapping it.""" - regions = self._fdict.get(path_or_fd) - if regions is None: - regions = self.MapRegionListCls(path_or_fd) - self._fdict[path_or_fd] = regions - # END obtain region for path - return self.WindowCursorCls(self, regions) - - def collect(self): - """Collect all available free-to-collect mapped regions - :return: Amount of freed handles""" - return self._collect_lru_region(0) - - def num_file_handles(self): - """:return: amount of file handles in use. Each mapped region uses one file handle""" - return self._handle_count - - def num_open_files(self): - """Amount of opened files in the system""" - return reduce(lambda x, y: x + y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0) - - def window_size(self): - """:return: size of each window when allocating new regions""" - return self._window_size - - def mapped_memory_size(self): - """:return: amount of bytes currently mapped in total""" - return self._memory_size - - def max_file_handles(self): - """:return: maximum amount of handles we may have opened""" - return self._max_handle_count - - def max_mapped_memory_size(self): - """:return: maximum amount of memory we may allocate""" - return self._max_memory_size - - #} END interface - - #{ Special Purpose Interface - - def force_map_handle_removal_win(self, base_path): - """ONLY AVAILABLE ON WINDOWS - On windows removing files is not allowed if anybody still has it opened. - If this process is ourselves, and if the whole process uses this memory - manager (as far as the parent framework is concerned) we can enforce - closing all memory maps whose path matches the given base path to - allow the respective operation after all. - The respective system must NOT access the closed memory regions anymore ! - This really may only be used if you know that the items which keep - the cursors alive will not be using it anymore. They need to be recreated ! - :return: Amount of closed handles - - **Note:** does nothing on non-windows platforms""" - if sys.platform != 'win32': - return - # END early bailout - - num_closed = 0 - for path, rlist in self._fdict.items(): - if path.startswith(base_path): - for region in rlist: - region.release() - num_closed += 1 - # END path matches - # END for each path - return num_closed - #} END special purpose interface - - -class SlidingWindowMapManager(StaticWindowMapManager): - - """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily - obtain additional regions assuring there is no overlap. - Once a certain memory limit is reached globally, or if there cannot be more open file handles - which result from each mmap call, the least recently used, and currently unused mapped regions - are unloaded automatically. - - **Note:** currently not thread-safe ! - - **Note:** in the current implementation, we will automatically unload windows if we either cannot - create more memory maps (as the open file handles limit is hit) or if we have allocated more than - a safe amount of memory already, which would possibly cause memory allocations to fail as our address - space is full.""" - - __slots__ = tuple() - - def __init__(self, window_size=-1, max_memory_size=0, max_open_handles=sys.maxsize): - """Adjusts the default window size to -1""" - super().__init__(window_size, max_memory_size, max_open_handles) - - def _obtain_region(self, a, offset, size, flags, is_recursive): - # bisect to find an existing region. The c++ implementation cannot - # do that as it uses a linked list for regions. - r = None - lo = 0 - hi = len(a) - while lo < hi: - mid = (lo + hi) // 2 - ofs = a[mid]._b - if ofs <= offset: - if a[mid].includes_ofs(offset): - r = a[mid] - break - # END have region - lo = mid + 1 - else: - hi = mid - # END handle position - # END while bisecting - - if r is None: - window_size = self._window_size - left = self.MapWindowCls(0, 0) - mid = self.MapWindowCls(offset, size) - right = self.MapWindowCls(a.file_size(), 0) - - # we want to honor the max memory size, and assure we have anough - # memory available - # Save calls ! - if self._memory_size + window_size > self._max_memory_size: - self._collect_lru_region(window_size) - # END handle collection - - # we assume the list remains sorted by offset - insert_pos = 0 - len_regions = len(a) - if len_regions == 1: - if a[0]._b <= offset: - insert_pos = 1 - # END maintain sort - else: - # find insert position - insert_pos = len_regions - for i, region in enumerate(a): - if region._b > offset: - insert_pos = i - break - # END if insert position is correct - # END for each region - # END obtain insert pos - - # adjust the actual offset and size values to create the largest - # possible mapping - if insert_pos == 0: - if len_regions: - right = self.MapWindowCls.from_region(a[insert_pos]) - # END adjust right side - else: - if insert_pos != len_regions: - right = self.MapWindowCls.from_region(a[insert_pos]) - # END adjust right window - left = self.MapWindowCls.from_region(a[insert_pos - 1]) - # END adjust surrounding windows - - mid.extend_left_to(left, window_size) - mid.extend_right_to(right, window_size) - mid.align() - - # it can happen that we align beyond the end of the file - if mid.ofs_end() > right.ofs: - mid.size = right.ofs - mid.ofs - # END readjust size - - # insert new region at the right offset to keep the order - try: - if self._handle_count >= self._max_handle_count: - raise Exception - # END assert own imposed max file handles - r = self.MapRegionCls(a.path_or_fd(), mid.ofs, mid.size, flags) - except Exception: - # apparently we are out of system resources or hit a limit - # As many more operations are likely to fail in that condition ( - # like reading a file from disk, etc) we free up as much as possible - # As this invalidates our insert position, we have to recurse here - if is_recursive: - # we already tried this, and still have no success in obtaining - # a mapping. This is an exception, so we propagate it - raise - # END handle existing recursion - self._collect_lru_region(0) - return self._obtain_region(a, offset, size, flags, True) - # END handle exceptions - - self._handle_count += 1 - self._memory_size += r.size() - a.insert(insert_pos, r) - # END create new region - return r diff --git a/smmap/test/__init__.py b/smmap/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/smmap/test/lib.py b/smmap/test/lib.py deleted file mode 100644 index b15b0ec..0000000 --- a/smmap/test/lib.py +++ /dev/null @@ -1,72 +0,0 @@ -"""Provide base classes for the test system""" -from unittest import TestCase -import os -import tempfile - -__all__ = ['TestBase', 'FileCreator'] - - -#{ Utilities - -class FileCreator: - - """A instance which creates a temporary file with a prefix and a given size - and provides this info to the user. - Once it gets deleted, it will remove the temporary file as well.""" - __slots__ = ("_size", "_path") - - def __init__(self, size, prefix=''): - assert size, "Require size to be larger 0" - - self._size = size - - with tempfile.NamedTemporaryFile("wb", prefix=prefix, delete=False) as file: - self._path = file.name - file.seek(size - 1) - file.write(b'1') - - assert os.path.getsize(self.path) == size - - def __del__(self): - try: - os.remove(self.path) - except OSError: - pass - # END exception handling - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.__del__() - - @property - def path(self): - return self._path - - @property - def size(self): - return self._size - -#} END utilities - - -class TestBase(TestCase): - - """Foundation used by all tests""" - - #{ Configuration - k_window_test_size = 1000 * 1000 * 8 + 5195 - #} END configuration - - #{ Overrides - @classmethod - def setUpAll(cls): - # nothing for now - pass - - # END overrides - - #{ Interface - - #} END interface diff --git a/smmap/test/test_buf.py b/smmap/test/test_buf.py deleted file mode 100644 index f0a86fb..0000000 --- a/smmap/test/test_buf.py +++ /dev/null @@ -1,126 +0,0 @@ -from .lib import TestBase, FileCreator - -from smmap.mman import ( - SlidingWindowMapManager, - StaticWindowMapManager -) -from smmap.buf import SlidingWindowMapBuffer - -from random import randint -from time import time -import sys -import os - - -man_optimal = SlidingWindowMapManager() -man_worst_case = SlidingWindowMapManager( - window_size=TestBase.k_window_test_size // 100, - max_memory_size=TestBase.k_window_test_size // 3, - max_open_handles=15) -static_man = StaticWindowMapManager() - - -class TestBuf(TestBase): - - def test_basics(self): - with FileCreator(self.k_window_test_size, "buffer_test") as fc: - - # invalid paths fail upon construction - c = man_optimal.make_cursor(fc.path) - self.assertRaises(ValueError, SlidingWindowMapBuffer, type(c)()) # invalid cursor - self.assertRaises(ValueError, SlidingWindowMapBuffer, c, fc.size) # offset too large - - buf = SlidingWindowMapBuffer() # can create uninitialized buffers - assert buf.cursor() is None - - # can call end access any time - buf.end_access() - buf.end_access() - assert len(buf) == 0 - - # begin access can revive it, if the offset is suitable - offset = 100 - assert buf.begin_access(c, fc.size) == False - assert buf.begin_access(c, offset) == True - assert len(buf) == fc.size - offset - assert buf.cursor().is_valid() - - # empty begin access keeps it valid on the same path, but alters the offset - assert buf.begin_access() == True - assert len(buf) == fc.size - assert buf.cursor().is_valid() - - # simple access - with open(fc.path, 'rb') as fp: - data = fp.read() - assert data[offset] == buf[0] - assert data[offset:offset * 2] == buf[0:offset] - - # negative indices, partial slices - assert buf[-1] == buf[len(buf) - 1] - assert buf[-10:] == buf[len(buf) - 10:len(buf)] - - # end access makes its cursor invalid - buf.end_access() - assert not buf.cursor().is_valid() - assert buf.cursor().is_associated() # but it remains associated - - # an empty begin access fixes it up again - assert buf.begin_access() == True and buf.cursor().is_valid() - del(buf) # ends access automatically - del(c) - - assert man_optimal.num_file_handles() == 1 - - # PERFORMANCE - # blast away with random access and a full mapping - we don't want to - # exaggerate the manager's overhead, but measure the buffer overhead - # We do it once with an optimal setting, and with a worse manager which - # will produce small mappings only ! - max_num_accesses = 100 - fd = os.open(fc.path, os.O_RDONLY) - for item in (fc.path, fd): - for manager, man_id in ((man_optimal, 'optimal'), - (man_worst_case, 'worst case'), - (static_man, 'static optimal')): - buf = SlidingWindowMapBuffer(manager.make_cursor(item)) - assert manager.num_file_handles() == 1 - for access_mode in range(2): # single, multi - num_accesses_left = max_num_accesses - num_bytes = 0 - fsize = fc.size - - st = time() - buf.begin_access() - while num_accesses_left: - num_accesses_left -= 1 - if access_mode: # multi - ofs_start = randint(0, fsize) - ofs_end = randint(ofs_start, fsize) - d = buf[ofs_start:ofs_end] - assert len(d) == ofs_end - ofs_start - assert d == data[ofs_start:ofs_end] - num_bytes += len(d) - del d - else: - pos = randint(0, fsize) - assert buf[pos] == data[pos] - num_bytes += 1 - # END handle mode - # END handle num accesses - - buf.end_access() - assert manager.num_file_handles() - assert manager.collect() - assert manager.num_file_handles() == 0 - elapsed = max(time() - st, 0.001) # prevent zero division errors on windows - mb = float(1000 * 1000) - mode_str = (access_mode and "slice") or "single byte" - print("%s: Made %i random %s accesses to buffer created from %s reading a total of %f mb in %f s (%f mb/s)" - % (man_id, max_num_accesses, mode_str, type(item), num_bytes / mb, elapsed, (num_bytes / mb) / elapsed), - file=sys.stderr) - # END handle access mode - del buf - # END for each manager - # END for each input - os.close(fd) diff --git a/smmap/test/test_mman.py b/smmap/test/test_mman.py deleted file mode 100644 index 7a5f409..0000000 --- a/smmap/test/test_mman.py +++ /dev/null @@ -1,224 +0,0 @@ -from .lib import TestBase, FileCreator - -from smmap.mman import ( - WindowCursor, - SlidingWindowMapManager, - StaticWindowMapManager -) -from smmap.util import align_to_mmap - -from random import randint -from time import time -import os -import sys -from copy import copy - - -class TestMMan(TestBase): - - def test_cursor(self): - with FileCreator(self.k_window_test_size, "cursor_test") as fc: - man = SlidingWindowMapManager() - ci = WindowCursor(man) # invalid cursor - assert not ci.is_valid() - assert not ci.is_associated() - assert ci.size() == 0 # this is cached, so we can query it in invalid state - - cv = man.make_cursor(fc.path) - assert not cv.is_valid() # no region mapped yet - assert cv.is_associated() # but it know where to map it from - assert cv.file_size() == fc.size - assert cv.path() == fc.path - - # copy module - cio = copy(cv) - assert not cio.is_valid() and cio.is_associated() - - # assign method - assert not ci.is_associated() - ci.assign(cv) - assert not ci.is_valid() and ci.is_associated() - - # unuse non-existing region is fine - cv.unuse_region() - cv.unuse_region() - - # destruction is fine (even multiple times) - cv._destroy() - WindowCursor(man)._destroy() - - def test_memory_manager(self): - slide_man = SlidingWindowMapManager() - static_man = StaticWindowMapManager() - - for man in (static_man, slide_man): - assert man.num_file_handles() == 0 - assert man.num_open_files() == 0 - winsize_cmp_val = 0 - if isinstance(man, StaticWindowMapManager): - winsize_cmp_val = -1 - # END handle window size - assert man.window_size() > winsize_cmp_val - assert man.mapped_memory_size() == 0 - assert man.max_mapped_memory_size() > 0 - - # collection doesn't raise in 'any' mode - man._collect_lru_region(0) - # doesn't raise if we are within the limit - man._collect_lru_region(10) - - # doesn't fail if we over-allocate - assert man._collect_lru_region(sys.maxsize) == 0 - - # use a region, verify most basic functionality - with FileCreator(self.k_window_test_size, "manager_test") as fc: - fd = os.open(fc.path, os.O_RDONLY) - try: - for item in (fc.path, fd): - c = man.make_cursor(item) - assert c.path_or_fd() is item - assert c.use_region(10, 10).is_valid() - assert c.ofs_begin() == 10 - assert c.size() == 10 - with open(fc.path, 'rb') as fp: - assert c.buffer()[:] == fp.read(20)[10:] - - if isinstance(item, int): - self.assertRaises(ValueError, c.path) - else: - self.assertRaises(ValueError, c.fd) - # END handle value error - # END for each input - finally: - os.close(fd) - # END for each manasger type - - def test_memman_operation(self): - # test more access, force it to actually unmap regions - with FileCreator(self.k_window_test_size, "manager_operation_test") as fc: - with open(fc.path, 'rb') as fp: - data = fp.read() - fd = os.open(fc.path, os.O_RDONLY) - try: - max_num_handles = 15 - # small_size = - for mtype, args in ((StaticWindowMapManager, (0, fc.size // 3, max_num_handles)), - (SlidingWindowMapManager, (fc.size // 100, fc.size // 3, max_num_handles)),): - for item in (fc.path, fd): - assert len(data) == fc.size - - # small windows, a reasonable max memory. Not too many regions at once - man = mtype(window_size=args[0], max_memory_size=args[1], max_open_handles=args[2]) - c = man.make_cursor(item) - - # still empty (more about that is tested in test_memory_manager() - assert man.num_open_files() == 0 - assert man.mapped_memory_size() == 0 - - base_offset = 5000 - # window size is 0 for static managers, hence size will be 0. We take that into consideration - size = man.window_size() // 2 - assert c.use_region(base_offset, size).is_valid() - rr = c.region() - assert rr.client_count() == 2 # the manager and the cursor and us - - assert man.num_open_files() == 1 - assert man.num_file_handles() == 1 - assert man.mapped_memory_size() == rr.size() - - # assert c.size() == size # the cursor may overallocate in its static version - assert c.ofs_begin() == base_offset - assert rr.ofs_begin() == 0 # it was aligned and expanded - if man.window_size(): - # but isn't larger than the max window (aligned) - assert rr.size() == align_to_mmap(man.window_size(), True) - else: - assert rr.size() == fc.size - # END ignore static managers which dont use windows and are aligned to file boundaries - - assert c.buffer()[:] == data[base_offset:base_offset + (size or c.size())] - - # obtain second window, which spans the first part of the file - it is a still the same window - nsize = (size or fc.size) - 10 - assert c.use_region(0, nsize).is_valid() - assert c.region() == rr - assert man.num_file_handles() == 1 - assert c.size() == nsize - assert c.ofs_begin() == 0 - assert c.buffer()[:] == data[:nsize] - - # map some part at the end, our requested size cannot be kept - overshoot = 4000 - base_offset = fc.size - (size or c.size()) + overshoot - assert c.use_region(base_offset, size).is_valid() - if man.window_size(): - assert man.num_file_handles() == 2 - assert c.size() < size - assert c.region() is not rr # old region is still available, but has not cursor ref anymore - assert rr.client_count() == 1 # only held by manager - else: - assert c.size() < fc.size - # END ignore static managers which only have one handle per file - rr = c.region() - assert rr.client_count() == 2 # manager + cursor - assert rr.ofs_begin() < c.ofs_begin() # it should have extended itself to the left - assert rr.ofs_end() <= fc.size # it cannot be larger than the file - assert c.buffer()[:] == data[base_offset:base_offset + (size or c.size())] - - # unising a region makes the cursor invalid - c.unuse_region() - assert not c.is_valid() - if man.window_size(): - # but doesn't change anything regarding the handle count - we cache it and only - # remove mapped regions if we have to - assert man.num_file_handles() == 2 - # END ignore this for static managers - - # iterate through the windows, verify data contents - # this will trigger map collection after a while - max_random_accesses = 5000 - num_random_accesses = max_random_accesses - memory_read = 0 - st = time() - - # cache everything to get some more performance - includes_ofs = c.includes_ofs - max_mapped_memory_size = man.max_mapped_memory_size() - max_file_handles = man.max_file_handles() - mapped_memory_size = man.mapped_memory_size - num_file_handles = man.num_file_handles - while num_random_accesses: - num_random_accesses -= 1 - base_offset = randint(0, fc.size - 1) - - # precondition - if man.window_size(): - assert max_mapped_memory_size >= mapped_memory_size() - # END statistics will overshoot, which is fine - assert max_file_handles >= num_file_handles() - assert c.use_region(base_offset, (size or c.size())).is_valid() - csize = c.size() - assert c.buffer()[:] == data[base_offset:base_offset + csize] - memory_read += csize - - assert includes_ofs(base_offset) - assert includes_ofs(base_offset + csize - 1) - assert not includes_ofs(base_offset + csize) - # END while we should do an access - elapsed = max(time() - st, 0.001) # prevent zero division errors on windows - mb = float(1000 * 1000) - print("%s: Read %i mb of memory with %i random on cursor initialized with %s accesses in %fs (%f mb/s)\n" - % (mtype, memory_read / mb, max_random_accesses, type(item), elapsed, (memory_read / mb) / elapsed), - file=sys.stderr) - - # an offset as large as the size doesn't work ! - assert not c.use_region(fc.size, size).is_valid() - - # collection - it should be able to collect all - assert man.num_file_handles() - assert man.collect() - assert man.num_file_handles() == 0 - # END for each item - # END for each manager type - finally: - os.close(fd) diff --git a/smmap/test/test_tutorial.py b/smmap/test/test_tutorial.py deleted file mode 100644 index 31c272a..0000000 --- a/smmap/test/test_tutorial.py +++ /dev/null @@ -1,75 +0,0 @@ -from .lib import TestBase - - -class TestTutorial(TestBase): - - def test_example(self): - # Memory Managers - ################## - import smmap - # This instance should be globally available in your application - # It is configured to be well suitable for 32-bit or 64 bit applications. - mman = smmap.SlidingWindowMapManager() - - # the manager provides much useful information about its current state - # like the amount of open file handles or the amount of mapped memory - assert mman.num_file_handles() == 0 - assert mman.mapped_memory_size() == 0 - # and many more ... - - # Cursors - ########## - import smmap.test.lib - with smmap.test.lib.FileCreator(1024 * 1024 * 8, "test_file") as fc: - # obtain a cursor to access some file. - c = mman.make_cursor(fc.path) - - # the cursor is now associated with the file, but not yet usable - assert c.is_associated() - assert not c.is_valid() - - # before you can use the cursor, you have to specify a window you want to - # access. The following just says you want as much data as possible starting - # from offset 0. - # To be sure your region could be mapped, query for validity - assert c.use_region().is_valid() # use_region returns self - - # once a region was mapped, you must query its dimension regularly - # to assure you don't try to access its buffer out of its bounds - assert c.size() - c.buffer()[0] # first byte - c.buffer()[1:10] # first 9 bytes - c.buffer()[c.size() - 1] # last byte - - # you can query absolute offsets, and check whether an offset is included - # in the cursor's data. - assert c.ofs_begin() < c.ofs_end() - assert c.includes_ofs(100) - - # If you are over out of bounds with one of your region requests, the - # cursor will be come invalid. It cannot be used in that state - assert not c.use_region(fc.size, 100).is_valid() - # map as much as possible after skipping the first 100 bytes - assert c.use_region(100).is_valid() - - # You can explicitly free cursor resources by unusing the cursor's region - c.unuse_region() - assert not c.is_valid() - - # Buffers - ######### - # Create a default buffer which can operate on the whole file - buf = smmap.SlidingWindowMapBuffer(mman.make_cursor(fc.path)) - - # you can use it right away - assert buf.cursor().is_valid() - - buf[0] # access the first byte - buf[-1] # access the last ten bytes on the file - buf[-10:] # access the last ten bytes - - # If you want to keep the instance between different accesses, use the - # dedicated methods - buf.end_access() - assert not buf.cursor().is_valid() # you cannot use the buffer anymore - assert buf.begin_access(offset=10) # start using the buffer at an offset diff --git a/smmap/test/test_util.py b/smmap/test/test_util.py deleted file mode 100644 index e6ac10f..0000000 --- a/smmap/test/test_util.py +++ /dev/null @@ -1,105 +0,0 @@ -from .lib import TestBase, FileCreator - -from smmap.util import ( - MapWindow, - MapRegion, - MapRegionList, - ALLOCATIONGRANULARITY, - is_64_bit, - align_to_mmap -) - -import os -import sys - - -class TestMMan(TestBase): - - def test_window(self): - wl = MapWindow(0, 1) # left - wc = MapWindow(1, 1) # center - wc2 = MapWindow(10, 5) # another center - wr = MapWindow(8000, 50) # right - - assert wl.ofs_end() == 1 - assert wc.ofs_end() == 2 - assert wr.ofs_end() == 8050 - - # extension does nothing if already in place - maxsize = 100 - wc.extend_left_to(wl, maxsize) - assert wc.ofs == 1 and wc.size == 1 - wl.extend_right_to(wc, maxsize) - wl.extend_right_to(wc, maxsize) - assert wl.ofs == 0 and wl.size == 1 - - # an actual left extension - pofs_end = wc2.ofs_end() - wc2.extend_left_to(wc, maxsize) - assert wc2.ofs == wc.ofs_end() and pofs_end == wc2.ofs_end() - - # respects maxsize - wc.extend_right_to(wr, maxsize) - assert wc.ofs == 1 and wc.size == maxsize - wc.extend_right_to(wr, maxsize) - assert wc.ofs == 1 and wc.size == maxsize - - # without maxsize - wc.extend_right_to(wr, sys.maxsize) - assert wc.ofs_end() == wr.ofs and wc.ofs == 1 - - # extend left - wr.extend_left_to(wc2, maxsize) - wr.extend_left_to(wc2, maxsize) - assert wr.size == maxsize - - wr.extend_left_to(wc2, sys.maxsize) - assert wr.ofs == wc2.ofs_end() - - wc.align() - assert wc.ofs == 0 and wc.size == align_to_mmap(wc.size, True) - - def test_region(self): - with FileCreator(self.k_window_test_size, "window_test") as fc: - half_size = fc.size // 2 - rofs = align_to_mmap(4200, False) - rfull = MapRegion(fc.path, 0, fc.size) - rhalfofs = MapRegion(fc.path, rofs, fc.size) - rhalfsize = MapRegion(fc.path, 0, half_size) - - # offsets - assert rfull.ofs_begin() == 0 and rfull.size() == fc.size - assert rfull.ofs_end() == fc.size # if this method works, it works always - - assert rhalfofs.ofs_begin() == rofs and rhalfofs.size() == fc.size - rofs - assert rhalfsize.ofs_begin() == 0 and rhalfsize.size() == half_size - - assert rfull.includes_ofs(0) and rfull.includes_ofs(fc.size - 1) and rfull.includes_ofs(half_size) - assert not rfull.includes_ofs(-1) and not rfull.includes_ofs(sys.maxsize) - - # auto-refcount - assert rfull.client_count() == 1 - rfull2 = rfull - assert rfull.client_count() == 1, "no auto-counting" - - # window constructor - w = MapWindow.from_region(rfull) - assert w.ofs == rfull.ofs_begin() and w.ofs_end() == rfull.ofs_end() - - def test_region_list(self): - with FileCreator(100, "sample_file") as fc: - fd = os.open(fc.path, os.O_RDONLY) - try: - for item in (fc.path, fd): - ml = MapRegionList(item) - - assert len(ml) == 0 - assert ml.path_or_fd() == item - assert ml.file_size() == fc.size - finally: - os.close(fd) - - def test_util(self): - assert isinstance(is_64_bit(), bool) # just call it - assert align_to_mmap(1, False) == 0 - assert align_to_mmap(1, True) == ALLOCATIONGRANULARITY diff --git a/smmap/util.py b/smmap/util.py deleted file mode 100644 index fbb3872..0000000 --- a/smmap/util.py +++ /dev/null @@ -1,222 +0,0 @@ -"""Module containing a memory memory manager which provides a sliding window on a number of memory mapped files""" -import os -import sys - -from mmap import mmap, ACCESS_READ -from mmap import ALLOCATIONGRANULARITY - -__all__ = ["align_to_mmap", "is_64_bit", - "MapWindow", "MapRegion", "MapRegionList", "ALLOCATIONGRANULARITY"] - -#{ Utilities - - -def align_to_mmap(num, round_up): - """ - Align the given integer number to the closest page offset, which usually is 4096 bytes. - - :param round_up: if True, the next higher multiple of page size is used, otherwise - the lower page_size will be used (i.e. if True, 1 becomes 4096, otherwise it becomes 0) - :return: num rounded to closest page""" - res = (num // ALLOCATIONGRANULARITY) * ALLOCATIONGRANULARITY - if round_up and (res != num): - res += ALLOCATIONGRANULARITY - # END handle size - return res - - -def is_64_bit(): - """:return: True if the system is 64 bit. Otherwise it can be assumed to be 32 bit""" - return sys.maxsize > (1 << 32) - 1 - -#}END utilities - - -#{ Utility Classes - -class MapWindow: - - """Utility type which is used to snap windows towards each other, and to adjust their size""" - __slots__ = ( - 'ofs', # offset into the file in bytes - 'size' # size of the window in bytes - ) - - def __init__(self, offset, size): - self.ofs = offset - self.size = size - - def __repr__(self): - return "MapWindow(%i, %i)" % (self.ofs, self.size) - - @classmethod - def from_region(cls, region): - """:return: new window from a region""" - return cls(region._b, region.size()) - - def ofs_end(self): - return self.ofs + self.size - - def align(self): - """Assures the previous window area is contained in the new one""" - nofs = align_to_mmap(self.ofs, 0) - self.size += self.ofs - nofs # keep size constant - self.ofs = nofs - self.size = align_to_mmap(self.size, 1) - - def extend_left_to(self, window, max_size): - """Adjust the offset to start where the given window on our left ends if possible, - but don't make yourself larger than max_size. - The resize will assure that the new window still contains the old window area""" - rofs = self.ofs - window.ofs_end() - nsize = rofs + self.size - rofs -= nsize - min(nsize, max_size) - self.ofs -= rofs - self.size += rofs - - def extend_right_to(self, window, max_size): - """Adjust the size to make our window end where the right window begins, but don't - get larger than max_size""" - self.size = min(self.size + (window.ofs - self.ofs_end()), max_size) - - -class MapRegion: - - """Defines a mapped region of memory, aligned to pagesizes - - **Note:** deallocates used region automatically on destruction""" - __slots__ = [ - '_b', # beginning of mapping - '_mf', # mapped memory chunk (as returned by mmap) - '_uc', # total amount of usages - '_size', # cached size of our memory map - '__weakref__' - ] - - #{ Configuration - #} END configuration - - def __init__(self, path_or_fd, ofs, size, flags=0): - """Initialize a region, allocate the memory map - :param path_or_fd: path to the file to map, or the opened file descriptor - :param ofs: **aligned** offset into the file to be mapped - :param size: if size is larger then the file on disk, the whole file will be - allocated the the size automatically adjusted - :param flags: additional flags to be given when opening the file. - :raise Exception: if no memory can be allocated""" - self._b = ofs - self._size = 0 - self._uc = 0 - - if isinstance(path_or_fd, int): - fd = path_or_fd - else: - fd = os.open(path_or_fd, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags) - # END handle fd - - try: - kwargs = dict(access=ACCESS_READ, offset=ofs) - corrected_size = size - sizeofs = ofs - - # have to correct size, otherwise (instead of the c version) it will - # bark that the size is too large ... many extra file accesses because - # if this ... argh ! - actual_size = min(os.fstat(fd).st_size - sizeofs, corrected_size) - self._mf = mmap(fd, actual_size, **kwargs) - # END handle memory mode - - self._size = len(self._mf) - finally: - if isinstance(path_or_fd, str): - os.close(fd) - # END only close it if we opened it - # END close file handle - # We assume the first one to use us keeps us around - self.increment_client_count() - - def __repr__(self): - return "MapRegion<%i, %i>" % (self._b, self.size()) - - #{ Interface - - def buffer(self): - """:return: a buffer containing the memory""" - return self._mf - - def map(self): - """:return: a memory map containing the memory""" - return self._mf - - def ofs_begin(self): - """:return: absolute byte offset to the first byte of the mapping""" - return self._b - - def size(self): - """:return: total size of the mapped region in bytes""" - return self._size - - def ofs_end(self): - """:return: Absolute offset to one byte beyond the mapping into the file""" - return self._b + self._size - - def includes_ofs(self, ofs): - """:return: True if the given offset can be read in our mapped region""" - return self._b <= ofs < self._b + self._size - - def client_count(self): - """:return: number of clients currently using this region""" - return self._uc - - def increment_client_count(self, ofs = 1): - """Adjust the usage count by the given positive or negative offset. - If usage count equals 0, we will auto-release our resources - :return: True if we released resources, False otherwise. In the latter case, we can still be used""" - self._uc += ofs - assert self._uc > -1, "Increments must match decrements, usage counter negative: %i" % self._uc - - if self.client_count() == 0: - self.release() - return True - else: - return False - # end handle release - - def release(self): - """Release all resources this instance might hold. Must only be called if there usage_count() is zero""" - self._mf.close() - - #} END interface - - -class MapRegionList(list): - - """List of MapRegion instances associating a path with a list of regions.""" - __slots__ = ( - '_path_or_fd', # path or file descriptor which is mapped by all our regions - '_file_size' # total size of the file we map - ) - - def __new__(cls, path): - return super().__new__(cls) - - def __init__(self, path_or_fd): - self._path_or_fd = path_or_fd - self._file_size = None - - def path_or_fd(self): - """:return: path or file descriptor we are attached to""" - return self._path_or_fd - - def file_size(self): - """:return: size of file we manager""" - if self._file_size is None: - if isinstance(self._path_or_fd, str): - self._file_size = os.stat(self._path_or_fd).st_size - else: - self._file_size = os.fstat(self._path_or_fd).st_size - # END handle path type - # END update file size - return self._file_size - -#} END utility classes diff --git a/tox.ini b/tox.ini deleted file mode 100644 index f7205c5..0000000 --- a/tox.ini +++ /dev/null @@ -1,17 +0,0 @@ -# Tox (http://tox.testrun.org/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - -[tox] -envlist = flake8, py{37, 38, 39, 310, 311, 312} - -[testenv] -commands = {envpython} -m pytest --cov smmap --cov-report xml {posargs} -deps = - pytest - pytest-cov - -[testenv:flake8] -commands = flake8 {posargs} -deps = flake8