Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e22862d

Browse filesBrowse files
committed
2 parents c74ca30 + 1db3b58 commit e22862d
Copy full SHA for e22862d
Expand file treeCollapse file tree

18 files changed

+625
-93
lines changed

‎.github/workflows/build-and-release.yaml

Copy file name to clipboardExpand all lines: .github/workflows/build-and-release.yaml
+5-5Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ${{ matrix.os }}
1212
strategy:
1313
matrix:
14-
os: [ubuntu-latest, windows-latest, macOS-latest]
14+
os: [ubuntu-20.04, windows-2019, macos-11]
1515

1616
steps:
1717
- uses: actions/checkout@v3
@@ -23,19 +23,19 @@ jobs:
2323
with:
2424
python-version: "3.8"
2525

26-
- name: Install cibuildwheel
27-
run: python -m pip install cibuildwheel==2.12.1
28-
2926
- name: Install dependencies
3027
run: |
3128
python -m pip install --upgrade pip
3229
python -m pip install -e .[all]
3330
3431
- name: Build wheels
35-
run: python -m cibuildwheel --output-dir wheelhouse
32+
uses: pypa/cibuildwheel@v2.16.5
3633
env:
3734
# disable repair
3835
CIBW_REPAIR_WHEEL_COMMAND: ""
36+
with:
37+
package-dir: .
38+
output-dir: wheelhouse
3939

4040
- uses: actions/upload-artifact@v3
4141
with:
+131Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
name: Build Wheels (CUDA)
2+
3+
on: workflow_dispatch
4+
5+
permissions:
6+
contents: write
7+
8+
jobs:
9+
define_matrix:
10+
name: Define Build Matrix
11+
runs-on: ubuntu-latest
12+
outputs:
13+
matrix: ${{ steps.set-matrix.outputs.matrix }}
14+
defaults:
15+
run:
16+
shell: pwsh
17+
18+
steps:
19+
- name: Define Job Output
20+
id: set-matrix
21+
run: |
22+
$matrix = @{
23+
'os' = @('ubuntu-20.04', 'windows-latest')
24+
'pyver' = @("3.10", "3.11", "3.12")
25+
'cuda' = @("12.1.1", "12.2.2", "12.3.2")
26+
'releasetag' = @("basic")
27+
}
28+
29+
$matrixOut = ConvertTo-Json $matrix -Compress
30+
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
31+
32+
build_wheels:
33+
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
34+
needs: define_matrix
35+
runs-on: ${{ matrix.os }}
36+
strategy:
37+
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
38+
defaults:
39+
run:
40+
shell: pwsh
41+
env:
42+
CUDAVER: ${{ matrix.cuda }}
43+
AVXVER: ${{ matrix.releasetag }}
44+
45+
steps:
46+
- uses: actions/checkout@v4
47+
with:
48+
submodules: "recursive"
49+
50+
- uses: actions/setup-python@v4
51+
with:
52+
python-version: ${{ matrix.pyver }}
53+
54+
- name: Setup Mamba
55+
uses: conda-incubator/setup-miniconda@v2.2.0
56+
with:
57+
activate-environment: "build"
58+
python-version: ${{ matrix.pyver }}
59+
miniforge-variant: Mambaforge
60+
miniforge-version: latest
61+
use-mamba: true
62+
add-pip-as-python-dependency: true
63+
auto-activate-base: false
64+
65+
- name: VS Integration Cache
66+
id: vs-integration-cache
67+
if: runner.os == 'Windows'
68+
uses: actions/cache@v3.3.2
69+
with:
70+
path: ./MSBuildExtensions
71+
key: cuda-${{ matrix.cuda }}-vs-integration
72+
73+
- name: Get Visual Studio Integration
74+
if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true'
75+
run: |
76+
if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER}
77+
$links = (Invoke-RestMethod 'https://github.com/Jimver/cuda-toolkit/raw/dc0ca7bb29c5a92f7a963d3d5c93f8d59765136a/src/links/windows-links.ts').Trim().split().where({$_ -ne ''})
78+
for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}}
79+
Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip'
80+
& 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null
81+
Remove-Item 'cudainstaller.zip'
82+
83+
- name: Install Visual Studio Integration
84+
if: runner.os == 'Windows'
85+
run: |
86+
$y = (gi '.\MSBuildExtensions').fullname + '\*'
87+
(gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
88+
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
89+
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
90+
91+
- name: Install Dependencies
92+
env:
93+
MAMBA_DOWNLOAD_FAILFAST: "0"
94+
MAMBA_NO_LOW_SPEED_LIMIT: "1"
95+
run: |
96+
$cudaVersion = $env:CUDAVER
97+
mamba install -y 'cuda' -c nvidia/label/cuda-$cudaVersion
98+
python -m pip install build wheel
99+
100+
- name: Build Wheel
101+
run: |
102+
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
103+
$env:CUDA_PATH = $env:CONDA_PREFIX
104+
$env:CUDA_HOME = $env:CONDA_PREFIX
105+
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
106+
if ($IsLinux) {
107+
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
108+
}
109+
$env:VERBOSE = '1'
110+
$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all'
111+
$env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
112+
if ($env:AVXVER -eq 'AVX') {
113+
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
114+
}
115+
if ($env:AVXVER -eq 'AVX512') {
116+
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'
117+
}
118+
if ($env:AVXVER -eq 'basic') {
119+
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
120+
}
121+
python -m build --wheel
122+
# write the build tag to the output
123+
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
124+
125+
- uses: softprops/action-gh-release@v1
126+
with:
127+
files: dist/*
128+
# Set tag_name to <tag>-cu<cuda_version>
129+
tag_name: ${{ github.ref_name }}-cu${{ env.CUDA_VERSION }}
130+
env:
131+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+87Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
name: Build Wheels (Metal)
2+
3+
on: workflow_dispatch
4+
5+
permissions:
6+
contents: write
7+
8+
jobs:
9+
define_matrix:
10+
name: Define Build Matrix
11+
runs-on: ubuntu-latest
12+
outputs:
13+
matrix: ${{ steps.set-matrix.outputs.matrix }}
14+
defaults:
15+
run:
16+
shell: pwsh
17+
18+
steps:
19+
- name: Define Job Output
20+
id: set-matrix
21+
run: |
22+
$matrix = @{
23+
'os' = @('macos-11', 'macos-12', 'macos-13')
24+
'pyver' = @('3.10', '3.11', '3.12')
25+
}
26+
27+
$matrixOut = ConvertTo-Json $matrix -Compress
28+
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
29+
30+
build_wheels:
31+
name: ${{ matrix.os }} Python ${{ matrix.pyver }}
32+
needs: define_matrix
33+
runs-on: ${{ matrix.os }}
34+
strategy:
35+
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
36+
env:
37+
OSVER: ${{ matrix.os }}
38+
39+
steps:
40+
- uses: actions/checkout@v4
41+
with:
42+
submodules: "recursive"
43+
44+
- uses: actions/setup-python@v4
45+
with:
46+
python-version: ${{ matrix.pyver }}
47+
48+
- name: Install Dependencies
49+
run: |
50+
python -m pip install build wheel cmake
51+
52+
- name: Build Wheel
53+
run: |
54+
XCODE15PATH="/Applications/Xcode_15.0.app/Contents/Developer"
55+
XCODE15BINPATH="${XCODE15PATH}/Toolchains/XcodeDefault.xctoolchain/usr/bin"
56+
export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_METAL=on"
57+
[[ "$OSVER" == "macos-13" ]] && export CC="${XCODE15BINPATH}/cc" && export CXX="${XCODE15BINPATH}/c++" && export MACOSX_DEPLOYMENT_TARGET="13.0"
58+
[[ "$OSVER" == "macos-12" ]] && export MACOSX_DEPLOYMENT_TARGET="12.0"
59+
[[ "$OSVER" == "macos-11" ]] && export MACOSX_DEPLOYMENT_TARGET="11.0"
60+
61+
export CMAKE_OSX_ARCHITECTURES="arm64" && export ARCHFLAGS="-arch arm64"
62+
VERBOSE=1 python -m build --wheel
63+
64+
if [[ "$OSVER" == "macos-13" ]]; then
65+
export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk"
66+
export MACOSX_DEPLOYMENT_TARGET="14.0"
67+
VERBOSE=1 python -m build --wheel
68+
fi
69+
70+
for file in ./dist/*.whl; do cp "$file" "${file/arm64.whl/aarch64.whl}"; done
71+
72+
export CMAKE_OSX_ARCHITECTURES="x86_64" && export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_METAL=on" && export ARCHFLAGS="-arch x86_64"
73+
VERBOSE=1 python -m build --wheel
74+
75+
if [[ "$OSVER" == "macos-13" ]]; then
76+
export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk"
77+
export MACOSX_DEPLOYMENT_TARGET="14.0"
78+
VERBOSE=1 python -m build --wheel
79+
fi
80+
81+
- uses: softprops/action-gh-release@v1
82+
with:
83+
files: dist/*
84+
# set release name to <tag>-metal
85+
tag_name: ${{ github.ref_name }}-metal
86+
env:
87+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+48Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: Wheels Index
2+
3+
on:
4+
# Trigger on any new release
5+
release:
6+
types: [published]
7+
8+
# Allows you to run this workflow manually from the Actions tab
9+
workflow_dispatch:
10+
11+
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
12+
permissions:
13+
contents: read
14+
pages: write
15+
id-token: write
16+
17+
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
18+
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
19+
concurrency:
20+
group: "pages"
21+
cancel-in-progress: false
22+
23+
jobs:
24+
# Single deploy job since we're just deploying
25+
deploy:
26+
environment:
27+
name: github-pages
28+
url: ${{ steps.deployment.outputs.page_url }}
29+
runs-on: ubuntu-latest
30+
steps:
31+
- name: Checkout
32+
uses: actions/checkout@v4
33+
- name: Setup Pages
34+
uses: actions/configure-pages@v4
35+
- name: Build
36+
run: |
37+
./scripts/releases-to-pep-503.sh index/whl/cpu '^[v]?[0-9]+\.[0-9]+\.[0-9]+$'
38+
./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$'
39+
./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
40+
./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
41+
- name: Upload artifact
42+
uses: actions/upload-pages-artifact@v3
43+
with:
44+
# Upload entire repository
45+
path: 'index'
46+
- name: Deploy to GitHub Pages
47+
id: deployment
48+
uses: actions/deploy-pages@v4

‎CHANGELOG.md

Copy file name to clipboardExpand all lines: CHANGELOG.md
+17-1Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.59]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c
13+
- feat: Binary wheels for CPU, CUDA (12.1 - 12.3), Metal by @abetlen, @jllllll, and @oobabooga in #1247
14+
- fix: segfault when logits_all=False by @abetlen in 8649d7671bd1a7c0d9cc6a5ad91c6ca286512ab3
15+
- fix: last tokens passing to sample_repetition_penalties function by @ymikhailov in #1295
16+
17+
## [0.2.58]
18+
19+
- feat: Update llama.cpp to ggerganov/llama.cpp@ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c
20+
- feat: add support for KV cache quantization options by @Limour-dev in #1307
21+
- feat: Add logprobs support to chat completions by @windspirit95 in #1311
22+
- fix: set LLAMA_METAL_EMBED_LIBRARY=on on MacOS arm64 by @bretello in #1289
23+
- feat: Add tools/functions variables to Jinja2ChatFormatter, add function response formatting for all simple chat formats by @CISC in #1273
24+
- fix: Changed local API doc references to hosted by by @lawfordp2017 in #1317
25+
1026
## [0.2.57]
1127

1228
- feat: Update llama.cpp to ggerganov/llama.cpp@ac9ee6a4ad740bc1ee484ede43e9f92b5af244c1
@@ -24,7 +40,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2440

2541
## [0.2.55]
2642

27-
- feat: Update llama.cpp to ggerganov/9731134296af3a6839cd682e51d9c2109a871de5
43+
- feat: Update llama.cpp to ggerganov/llama.cpp@9731134296af3a6839cd682e51d9c2109a871de5
2844
- docs: fix small typo in README: 'model know how' -> 'model knows how' by @boegel in #1244
2945

3046
## [0.2.54]

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.