File tree Expand file tree Collapse file tree 7 files changed +448
-154
lines changed
Filter options
Expand file tree Collapse file tree 7 files changed +448
-154
lines changed
Original file line number Diff line number Diff line change @@ -9,4 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
10
10
### Added
11
11
12
- - Added first version of the changelog
12
+ - Added first version of the changelog
13
+
14
+ ### Fixed
15
+
16
+ - Performance bug in stop sequence check slowing down streaming.
Original file line number Diff line number Diff line change
1
+ update :
2
+ poetry install
3
+ git submodule update --init --recursive
4
+
5
+ update.vendor :
6
+ cd vendor/llama.cpp && git pull origin master
7
+
8
+ build :
9
+ python3 setup.py develop
10
+
11
+ build.cuda :
12
+ CMAKE_ARGS=" -DLLAMA_CUBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
13
+
14
+ build.opencl :
15
+ CMAKE_ARGS=" -DLLAMA_CLBLAST=on" FORCE_CMAKE=1 python3 setup.py develop
16
+
17
+ build.openblas :
18
+ CMAKE_ARGS=" -DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
19
+
20
+ build.blis :
21
+ CMAKE_ARGS=" -DLLAMA_OPENBLAS=on -DLLAMA_OPENBLAS_VENDOR=blis" FORCE_CMAKE=1 python3 setup.py develop
22
+
23
+ build.sdist :
24
+ python3 setup.py sdist
25
+
26
+ deploy.pypi :
27
+ python3 -m twine upload dist/*
28
+
29
+ deploy.gh-docs :
30
+ mkdocs build
31
+ mkdocs gh-deploy
32
+
33
+ clean :
34
+ - cd vendor/llama.cpp && make clean
35
+ - cd vendor/llama.cpp && rm libllama.so
36
+ - rm -rf _skbuild
37
+ - rm llama_cpp/libllama.so
38
+
39
+ .PHONY : \
40
+ update \
41
+ update.vendor \
42
+ build \
43
+ build.cuda \
44
+ build.opencl \
45
+ build.openblas \
46
+ build.sdist \
47
+ deploy.pypi \
48
+ deploy.gh-docs \
49
+ clean
Original file line number Diff line number Diff line change @@ -155,6 +155,17 @@ To get started, clone the repository and install the package in development mode
155
155
156
156
``` bash
157
157
git clone --recurse-submodules git@github.com:abetlen/llama-cpp-python.git
158
+
159
+ # Install with pip
160
+ pip install -e .
161
+
162
+ # if you want to use the fastapi / openapi server
163
+ pip install -e .[server]
164
+
165
+ # If you're a poetry user, installing will also include a virtual environment
166
+ poetry install --all-extras
167
+ . .venv/bin/activate
168
+
158
169
# Will need to be re-run any time vendor/llama.cpp is updated
159
170
python3 setup.py develop
160
171
```
Original file line number Diff line number Diff line change @@ -795,20 +795,22 @@ def _create_completion(
795
795
break
796
796
797
797
if stream :
798
+ remaining_tokens = completion_tokens [returned_tokens :]
799
+ remaining_text = self .detokenize (remaining_tokens )
800
+ remaining_length = len (remaining_text )
801
+
798
802
# We want to avoid yielding any characters from
799
803
# the generated text if they are part of a stop
800
804
# sequence.
801
805
first_stop_position = 0
802
806
for s in stop_sequences :
803
- for i in range (len (s ), 0 , - 1 ):
804
- if all_text .endswith (s [:i ]):
807
+ for i in range (min ( len (s ), remaining_length ), 0 , - 1 ):
808
+ if remaining_text .endswith (s [:i ]):
805
809
if i > first_stop_position :
806
810
first_stop_position = i
807
811
break
808
812
809
813
token_end_position = 0
810
- remaining_tokens = completion_tokens [returned_tokens :]
811
- remaining_length = len (self .detokenize (remaining_tokens ))
812
814
for token in remaining_tokens :
813
815
token_end_position += len (self .detokenize ([token ]))
814
816
# Check if stop sequence is in the token
Original file line number Diff line number Diff line change
1
+ [virtualenvs ]
2
+ in-project = true
3
+ prefer-active-python = true
Original file line number Diff line number Diff line change @@ -15,7 +15,9 @@ include = [
15
15
[tool .poetry .dependencies ]
16
16
python = " ^3.8.1"
17
17
typing-extensions = " ^4.5.0"
18
-
18
+ uvicorn = { version = " ^0.21.1" , optional = true }
19
+ fastapi = { version = " ^0.95.0" , optional = true }
20
+ sse-starlette = { version = " ^1.3.3" , optional = true }
19
21
20
22
[tool .poetry .group .dev .dependencies ]
21
23
black = " ^23.3.0"
@@ -25,6 +27,10 @@ mkdocstrings = {extras = ["python"], version = "^0.21.2"}
25
27
mkdocs-material = " ^9.1.14"
26
28
pytest = " ^7.3.1"
27
29
httpx = " ^0.24.1"
30
+ scikit-build = " 0.13"
31
+
32
+ [tool .poetry .extras ]
33
+ server = [" uvicorn" , " fastapi" , " sse-starlette" ]
28
34
29
35
[build-system ]
30
36
requires = [
You can’t perform that action at this time.
0 commit comments