diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml deleted file mode 100644 index 0ac03351..00000000 --- a/.github/workflows/codeql.yml +++ /dev/null @@ -1,83 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: "CodeQL" - -on: - push: - branches: [ "main" ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ "main" ] - schedule: - - cron: '24 0 * * 2' - -jobs: - analyze: - name: Analyze - # Runner size impacts CodeQL analysis time. To learn more, please see: - # - https://gh.io/recommended-hardware-resources-for-running-codeql - # - https://gh.io/supported-runners-and-hardware-resources - # - https://gh.io/using-larger-runners - # Consider using larger runners for possible analysis time improvements. - runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} - timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'python' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] - # Use only 'java' to analyze code written in Java, Kotlin or both - # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - - steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v2 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - - # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - # queries: security-extended,security-and-quality - - - # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v2 - - # â„šī¸ Command-line programs to run using the OS shell. - # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 - with: - category: "/language:${{matrix.language}}" - diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 00000000..594f161c --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,71 @@ +name: Publish Python 🐍 distribution đŸ“Ļ to PyPI and TestPyPI + +on: push + +jobs: + build: + name: Build distribution đŸ“Ļ + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v4 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: >- + Publish Python 🐍 distribution đŸ“Ļ to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/llm-workflow-engine + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution đŸ“Ļ to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + + publish-to-testpypi: + name: Publish Python 🐍 distribution đŸ“Ļ to TestPyPI + needs: + - build + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/p/llm-workflow-engine + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution đŸ“Ļ to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 2171af55..205a304d 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v3 diff --git a/COMMIT_LOG.md b/COMMIT_LOG.md index 7da6096c..5f4d28bf 100644 --- a/COMMIT_LOG.md +++ b/COMMIT_LOG.md @@ -1,3 +1,21 @@ +### v0.22.11 - 04/16/2025 + +* **Wed Apr 16 2025:** o3/o4-mini +* **Mon Apr 14 2025:** gpt 4.1 mini/nano + +### v0.22.10 - 04/14/2025 + +* **Mon Apr 14 2025:** gpt-4.1 + +### v0.22.9 - 04/08/2025 + +* **Mon Apr 07 2025:** migrate from textract to kreuzberg *(NOTE: with kreuzberg, pandoc is a required dependency for using workflows with non-PDF documents)* +* **Mon Apr 07 2025:** switch to newer build system + +### v0.22.8 - 04/07/2025 + +* **Mon Apr 07 2025:** workflow for auto-publish to PyPi + ### v0.22.7 - 03/19/2025 * **Wed Mar 19 2025:** support AIMessage objects in streaming diff --git a/README.md b/README.md index 61860c4b..a226fce6 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![Test status](https://github.com/llm-workflow-engine/llm-workflow-engine/actions/workflows/python-app.yml/badge.svg)](https://github.com/llm-workflow-engine/llm-workflow-engine/actions/workflows/python-app.yml) -[![CodeQL status](https://github.com/llm-workflow-engine/llm-workflow-engine/actions/workflows/codeql.yml/badge.svg)](https://github.com/llm-workflow-engine/llm-workflow-engine/actions/workflows/codeql.yml) +[![CodeQL](https://github.com/llm-workflow-engine/llm-workflow-engine/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/llm-workflow-engine/llm-workflow-engine/actions/workflows/github-code-scanning/codeql)

lwe-logo-small diff --git a/docs/installation.rst b/docs/installation.rst index 673ac593..218de6e1 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -16,15 +16,22 @@ Requirements To use this project, you need: -* Python 3.9 or later -* ``setuptools`` installed. You can install it using ``pip install setuptools``. Make sure that you have the last version of pip: ``pip install --upgrade pip`` +* Python 3.9 or later with pip installed. * A database backend (`SQLite `_ by default, any configurable in `SQLAlchemy `_ allowed). +* Optional: + * `Pandoc `_ for document extraction *other than* PDFs. ----------------------------------------------- From packages ----------------------------------------------- -Install the latest version of this software directly from github with pip: +Install the latest release of this software via pip: + +.. code-block:: bash + + pip install llm-workflow-engine + +Install the 'bleeding edge' of this software directly from github with pip: .. code-block:: bash diff --git a/docs/plugins.rst b/docs/plugins.rst index 83ef91ce..02188fe2 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -122,8 +122,8 @@ Supported providers **NOTE:** While these provider integrations are working, none have been well-tested yet. -* **provider_ai21:** Access to `AI21 `_ models - https://github.com/llm-workflow-engine/lwe-plugin-provider-ai21 +* **provider_ai21:** Access to `Chat AI21 `_ models + https://github.com/llm-workflow-engine/lwe-plugin-provider-chat-ai21 * **provider_azure_openai_chat:** Access to `Azure OpenAI `_ chat models https://github.com/llm-workflow-engine/lwe-plugin-provider-azure-openai-chat * **provider_chat_anthropic:** Access to `Anthropic `_ chat models @@ -146,12 +146,16 @@ Supported providers https://github.com/llm-workflow-engine/lwe-plugin-provider-chat-together * **provider_chat_vertexai:** Access to `Google Vertex AI `_ chat models. https://github.com/llm-workflow-engine/lwe-plugin-provider-chat-vertexai +* **provider_chat_xai:** Access to `xAI `_ chat models. + https://github.com/llm-workflow-engine/lwe-plugin-provider-chat-xai * **provider_huggingface_hub:** Access to `Hugging Face Hub `_ models https://github.com/llm-workflow-engine/lwe-plugin-provider-huggingface-hub * **provider_openai:** Access to non-chat `OpenAI `_ models (GPT-3, etc.) https://github.com/llm-workflow-engine/lwe-plugin-provider-openai * **provider_openrouter:** Access to `OpenRouter `_ models https://github.com/llm-workflow-engine/lwe-plugin-provider-openrouter +* **provider_requesty:** Access to `Requesty `_ models + https://github.com/llm-workflow-engine/lwe-plugin-provider-requesty * **provider_vertexai:** Access to `Google Vertex AI `_ text/code models. https://github.com/llm-workflow-engine/lwe-plugin-provider-vertexai diff --git a/docs/requirements.txt b/docs/requirements.txt index e221e961..7ba6cf97 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,31 +2,31 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile requirements.in +# pip-compile --output-file=- # alabaster==0.7.16 # via sphinx -babel==2.12.1 +babel==2.17.0 # via sphinx -certifi==2023.5.7 +certifi==2025.1.31 # via requests -charset-normalizer==3.2.0 +charset-normalizer==3.4.1 # via requests -docutils==0.20.1 +docutils==0.21.2 # via sphinx -idna==3.4 +idna==3.10 # via requests imagesize==1.4.1 # via sphinx -jinja2==3.1.2 +jinja2==3.1.6 # via sphinx -markupsafe==2.1.3 +markupsafe==3.0.2 # via jinja2 -packaging==23.1 +packaging==24.2 # via sphinx -pygments==2.15.1 +pygments==2.19.1 # via sphinx -requests==2.31.0 +requests==2.32.3 # via sphinx snowballstemmer==2.2.0 # via sphinx @@ -36,17 +36,17 @@ sphinx==7.3.7 # sphinx-copybutton sphinx-copybutton==0.5.2 # via -r requirements.in -sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.10 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx -urllib3==2.0.3 +urllib3==2.4.0 # via requests diff --git a/docs/workflows.rst b/docs/workflows.rst index 1557c5d6..21a70ddd 100644 --- a/docs/workflows.rst +++ b/docs/workflows.rst @@ -66,7 +66,7 @@ Example: retries: 10 delay: 3 -``text_extractor``: Provides an easy way to extract text content from many different file types. For supported arguments and return values, see the `text_extractor module documentation `_. +``text_extractor``: Provides an easy way to extract text content from many different file types. For supported arguments and return values, see the `text_extractor module documentation `_. *NOTE: PDF document extraction is supported natively, to extract other document types (.docx, .xlsx, etc.) you need to install* `pandoc `_ *and make sure it's available in your PATH.* Example: diff --git a/lwe/backends/api/request.py b/lwe/backends/api/request.py index 7d3ea5d0..8c4abeaa 100644 --- a/lwe/backends/api/request.py +++ b/lwe/backends/api/request.py @@ -309,7 +309,7 @@ def strip_out_messages_over_max_tokens(self, messages, max_tokens): def is_openai_o_series(self): if self.provider.name == "provider_chat_openai": model_name = getattr(self.llm, self.provider.model_property_name) - if model_name.startswith("o1-") or model_name.startswith("o3-"): + if model_name.startswith("o1") or model_name.startswith("o3") or model_name.startswith("o4"): return True return False diff --git a/lwe/backends/api/workflow/library/text_extractor.py b/lwe/backends/api/workflow/library/text_extractor.py index e3208e10..647127e4 100644 --- a/lwe/backends/api/workflow/library/text_extractor.py +++ b/lwe/backends/api/workflow/library/text_extractor.py @@ -6,7 +6,7 @@ import tempfile from urllib.parse import urlparse -import textract +import kreuzberg import pymupdf4llm import fitz fitz.TOOLS.mupdf_display_errors(False) @@ -20,7 +20,7 @@ config.set("debug.log.enabled", True) log = Logger("text_extractor", config) -TEXTRACT_SUPPORTED_FILE_EXTENSIONS = [ +KREUZBERG_SUPPORTED_FILE_EXTENSIONS = [ # Microsoft Office formats ".docx", ".pptx", @@ -121,13 +121,13 @@ def extract_text_pymupdf(path): return pymupdf4llm.to_markdown(path) -def extract_text_textract(path): - return textract.process(path).decode("utf-8") +def extract_text_kreuzberg(path): + return kreuzberg.extract_file_sync(path) def get_file_extension(path, default_extension): file_extension = default_extension - for ext in set(TEXTRACT_SUPPORTED_FILE_EXTENSIONS + PYMUPDF_SUPPORTED_EXTENSIONS): + for ext in set(KREUZBERG_SUPPORTED_FILE_EXTENSIONS + PYMUPDF_SUPPORTED_EXTENSIONS): if path.lower().endswith(ext): file_extension = ext break @@ -184,12 +184,13 @@ def main(): message = f"Error extracting {path} content with pymupdf4llm: {str(e)}" log.error(message) module.fail_json(msg=message) - elif file_extension in TEXTRACT_SUPPORTED_FILE_EXTENSIONS: - log.debug(f"Extracting content from {path} with textract") + elif file_extension in KREUZBERG_SUPPORTED_FILE_EXTENSIONS: + log.debug(f"Extracting content from {path} with kreuzberg") try: - content = extract_text_textract(path) + extraction = extract_text_kreuzberg(path) + content = extraction.content except Exception as e: - message = f"Error extracting {file_extension} content with textract: {str(e)}" + message = f"Error extracting {file_extension} content with kreuzberg: {str(e)}" log.error(message) module.fail_json(msg=message) else: diff --git a/lwe/plugins/provider_chat_openai.py b/lwe/plugins/provider_chat_openai.py index f3ae1ac1..3fecd953 100644 --- a/lwe/plugins/provider_chat_openai.py +++ b/lwe/plugins/provider_chat_openai.py @@ -106,6 +106,24 @@ def static_models(self): "gpt-4o-mini-2024-07-18": { "max_tokens": 131072, }, + "gpt-4.1": { + "max_tokens": 1047576, + }, + "gpt-4.1-2025-04-14": { + "max_tokens": 1047576, + }, + "gpt-4.1-mini": { + "max_tokens": 1047576, + }, + "gpt-4.1-mini-2025-04-14": { + "max_tokens": 1047576, + }, + "gpt-4.1-nano": { + "max_tokens": 1047576, + }, + "gpt-4.1-nano-2025-04-14": { + "max_tokens": 1047576, + }, "gpt-4.5-preview": { "max_tokens": 131072, }, @@ -130,12 +148,31 @@ def static_models(self): "o1-2024-12-17": { "max_tokens": 204800, }, + # TODO: These are not chat models, how to support? + # "o1-pro": { + # "max_tokens": 204800, + # }, + # "o1-pro-2025-03-19": { + # "max_tokens": 204800, + # }, "o3-mini": { "max_tokens": 204800, }, "o3-mini-2025-01-31": { "max_tokens": 204800, }, + "o3": { + "max_tokens": 204800, + }, + "o3-2025-04-16": { + "max_tokens": 204800, + }, + "o4-mini": { + "max_tokens": 204800, + }, + "o4-mini-2025-04-16": { + "max_tokens": 204800, + }, } def prepare_messages_method(self): diff --git a/lwe/version.py b/lwe/version.py index 12f825ca..c98ace01 100644 --- a/lwe/version.py +++ b/lwe/version.py @@ -1 +1 @@ -__version__ = "0.22.7" +__version__ = "0.22.11" diff --git a/pyproject.toml b/pyproject.toml index aa4949aa..b601337e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,90 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "llm-workflow-engine" +dynamic = ["version"] +description = "CLI tool and workflow manager for common LLMs" +readme = "README.md" +authors = [ + {name = "Mahmoud Mabrouk", email = "mahmoudmabrouk.mail@gmail.com"}, + {name = "Chad Phillips"} +] +requires-python = ">=3.9" +license = "MIT" +classifiers = [ + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", +] +dependencies = [ + "ansible>=8.0", + "ansible-core>=2.15", + "alembic", + "beautifulsoup4", + "docutils>=0.20.1", + "email-validator", + "Jinja2", + "kreuzberg", + "langchain>=0.3.19,<0.4", + "langchain-core>=0.3.39,<0.4", + "langchain-community>=0.3.16,<0.4", + "langchain_openai>=0.3.3", + "names", + "numexpr>=2.8.4", + "openpyxl", + "pdfminer.six", + "prompt-toolkit", + "pymupdf4llm", + "pyperclip", + "python-frontmatter", + "PyYAML", + "rich", + "setuptools", + "sqlalchemy>=1.4.48", + "tiktoken", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-datadir", + "pip-tools", + "flake8", + "black", +] + +[project.urls] +Homepage = "https://github.com/llm-workflow-engine/llm-workflow-engine" +Repository = "https://github.com/llm-workflow-engine/llm-workflow-engine" + +[project.scripts] +lwe = "lwe.main:main" + +[project.entry-points."lwe_plugins"] + +[tool.setuptools] +packages = {find = {}} + +[tool.setuptools.package-data] +"lwe" = [ + "backends/api/schema/alembic.ini", + "backends/api/schema/alembic/*", + "backends/api/schema/alembic/**/*", + "examples/*", + "examples/**/*", + "tools/*", + "tools/**/*", + "presets/*", + "presets/**/*", + "templates/*", + "templates/**/*", + "workflows/*", + "workflows/**/*", +] + +[tool.setuptools.dynamic] +version = {attr = "lwe.version.__version__"} + [tool.black] line-length = 100 diff --git a/scripts/release-pypi.sh b/scripts/release-pypi.sh index 5bb04c09..c3950bf1 100755 --- a/scripts/release-pypi.sh +++ b/scripts/release-pypi.sh @@ -3,39 +3,54 @@ # Convenience script to handle preparing for PyPi release, and printing out the # commands to execute it. +clean() { + echo "Cleaning build environment" + rm -rfv dist/ build/ + find . -depth -name __pycache__ -type d -exec rm -rfv {} \; +} + execute() { - local update_build_release_packages="pip install --upgrade wheel build twine" - local clean="rm -rfv dist/ build/" + local update_build_release_packages="pip install --upgrade build twine validate-pyproject" + local validate="validate-pyproject pyproject.toml" local build="python -m build" + local check_dist="twine check dist/*" local test_pypi_upload="python -m twine upload --repository testpypi dist/*" local pypi_upload="python -m twine upload --skip-existing dist/*" + # Uncomment the following line if you want to sign releases + # local pypi_upload="python -m twine upload --skip-existing --sign dist/*" echo "Updating build and release packages with command:" echo " ${update_build_release_packages}" ${update_build_release_packages} if [ $? -eq 0 ]; then - echo "Cleaning build environment with command:" - echo " ${clean}" - ${clean} + echo "Validating pyproject.toml with command:" + echo " ${validate}" + ${validate} if [ $? -eq 0 ]; then echo "Building release with command:" echo " ${build}" ${build} if [ $? -eq 0 ]; then - echo "Build successful" - echo - echo "Test release with command:" - echo " ${test_pypi_upload}" - echo - echo "Release with command:" - echo " ${pypi_upload}" + echo "Checking built distribution with command:" + echo " ${check_dist}" + ${check_dist} + if [ $? -eq 0 ]; then + echo "Build successful and verified" + echo + echo "Test release with command:" + echo " ${test_pypi_upload}" + echo + echo "Release with command:" + echo " ${pypi_upload}" + fi fi fi fi } -if [ -d vit ] && [ -r setup.py ]; then +if [ -r pyproject.toml ]; then + clean execute else echo "ERROR: must run script from repository root" diff --git a/setup.py b/setup.py deleted file mode 100644 index 50e11356..00000000 --- a/setup.py +++ /dev/null @@ -1,57 +0,0 @@ -from setuptools import find_packages, setup -import re -from os import path - -FILE_DIR = path.dirname(path.abspath(path.realpath(__file__))) - -with open("README.md", "r", encoding="utf-8") as fh: - long_description = fh.read() - -with open("requirements.txt") as f: - install_requirement = f.readlines() - -with open(path.join(FILE_DIR, "lwe", "version.py")) as f: - version = re.match(r'^__version__ = "([\w\.]+)"$', f.read().strip())[1] - -setup( - name="llm-workflow-engine", - version=version, - author="Mahmoud Mabrouk, Chad Phillips", - author_email="mahmoudmabrouk.mail@gmail.com", - description="CLI tool and workflow manager for common LLMs, with a focus on OpenAI's models", - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/llm-workflow-engine/llm-workflow-engine", - packages=find_packages(), - package_data={ - "lwe": [ - "backends/api/schema/alembic.ini", - "backends/api/schema/alembic/*", - "backends/api/schema/alembic/**/*", - "examples/*", - "examples/**/*", - "tools/*", - "tools/**/*", - "presets/*", - "presets/**/*", - "templates/*", - "templates/**/*", - "workflows/*", - "workflows/**/*", - ], - }, - install_requires=install_requirement, - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - ], - python_requires=">=3.9", - entry_points={ - "console_scripts": [ - "lwe = lwe.main:main", - ], - "lwe_plugins": [], - }, - scripts=[], -)