sleeplessinva
diff --git a/‎CHANGELOG.md
Copy file name to clipboardExpand all lines: CHANGELOG.md
+5-1Lines changed: 5 additions & 1 deletion b/‎CHANGELOG.md
Copy file name to clipboardExpand all lines: CHANGELOG.md
+5-1Lines changed: 5 additions & 1 deletion
diff --git a/‎Makefile
Copy file name to clipboard
+49Lines changed: 49 additions & 0 deletions b/‎Makefile
Copy file name to clipboard
+49Lines changed: 49 additions & 0 deletions
diff --git a/‎README.md
Copy file name to clipboardExpand all lines: README.md
+11Lines changed: 11 additions & 0 deletions b/‎README.md
Copy file name to clipboardExpand all lines: README.md
+11Lines changed: 11 additions & 0 deletions
diff --git a/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-4Lines changed: 6 additions & 4 deletions b/‎llama_cpp/llama.py
Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-4Lines changed: 6 additions & 4 deletions
diff --git a/‎poetry.lock
Copy file name to clipboardExpand all lines: poetry.lock
+367-148Lines changed: 367 additions & 148 deletions b/‎poetry.lock
Copy file name to clipboardExpand all lines: poetry.lock
+367-148Lines changed: 367 additions & 148 deletions
diff --git a/‎poetry.toml
Copy file name to clipboard
+3Lines changed: 3 additions & 0 deletions b/‎poetry.toml
Copy file name to clipboard
+3Lines changed: 3 additions & 0 deletions
diff --git a/‎pyproject.toml
Copy file name to clipboardExpand all lines: pyproject.toml
+7-1Lines changed: 7 additions & 1 deletion b/‎pyproject.toml
Copy file name to clipboardExpand all lines: pyproject.toml
+7-1Lines changed: 7 additions & 1 deletion
@@ -9,4 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Added first version of the changelog
+- Added first version of the changelog
+
+### Fixed
+
+- Performance bug in stop sequence check slowing down streaming.
@@ -0,0 +1,49 @@
+update:
+	poetry install
+	git submodule update --init --recursive
+
+update.vendor:
+	cd vendor/llama.cpp && git pull origin master
+
+build:
+	python3 setup.py develop
+
+build.cuda:
+	CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
+
+build.opencl:
+	CMAKE_ARGS="-DLLAMA_CLBLAST=on" FORCE_CMAKE=1 python3 setup.py develop
+
+build.openblas:
+	CMAKE_ARGS="-DLLAMA_OPENBLAS=on" FORCE_CMAKE=1 python3 setup.py develop
+
+build.blis:
+	CMAKE_ARGS="-DLLAMA_OPENBLAS=on -DLLAMA_OPENBLAS_VENDOR=blis" FORCE_CMAKE=1 python3 setup.py develop
+
+build.sdist:
+	python3 setup.py sdist
+
+deploy.pypi:
+	python3 -m twine upload dist/*
+
+deploy.gh-docs:
+	mkdocs build
+	mkdocs gh-deploy
+
+clean:
+	- cd vendor/llama.cpp && make clean
+	- cd vendor/llama.cpp && rm libllama.so
+	- rm -rf _skbuild
+	- rm llama_cpp/libllama.so
+
+.PHONY: \
+	update \
+	update.vendor \
+	build \
+	build.cuda \
+	build.opencl \
+	build.openblas \
+	build.sdist \
+	deploy.pypi \
+	deploy.gh-docs \
+	clean
@@ -155,6 +155,17 @@ To get started, clone the repository and install the package in development mode
 
 ```bash
 git clone --recurse-submodules git@github.com:abetlen/llama-cpp-python.git
+
+# Install with pip
+pip install -e .
+
+# if you want to use the fastapi / openapi server
+pip install -e .[server]
+
+# If you're a poetry user, installing will also include a virtual environment
+poetry install --all-extras
+. .venv/bin/activate
+
 # Will need to be re-run any time vendor/llama.cpp is updated
 python3 setup.py develop
 ```
 
@@ -795,20 +795,22 @@ def _create_completion(
                 break
 
             if stream:
+                remaining_tokens = completion_tokens[returned_tokens:]
+                remaining_text = self.detokenize(remaining_tokens)
+                remaining_length = len(remaining_text)
+
                 # We want to avoid yielding any characters from
                 # the generated text if they are part of a stop
                 # sequence.
                 first_stop_position = 0
                 for s in stop_sequences:
-                    for i in range(len(s), 0, -1):
-                        if all_text.endswith(s[:i]):
+                    for i in range(min(len(s), remaining_length), 0, -1):
+                        if remaining_text.endswith(s[:i]):
                             if i > first_stop_position:
                                 first_stop_position = i
                             break
 
                 token_end_position = 0
-                remaining_tokens = completion_tokens[returned_tokens:]
-                remaining_length = len(self.detokenize(remaining_tokens))
                 for token in remaining_tokens:
                     token_end_position += len(self.detokenize([token]))
                     # Check if stop sequence is in the token
 
@@ -0,0 +1,3 @@
+[virtualenvs]
+in-project = true
+prefer-active-python = true
@@ -15,7 +15,9 @@ include = [
 [tool.poetry.dependencies]
 python = "^3.8.1"
 typing-extensions = "^4.5.0"
-
+uvicorn = { version = "^0.21.1", optional = true }
+fastapi = { version = "^0.95.0", optional = true }
+sse-starlette = { version = "^1.3.3", optional = true }
 
 [tool.poetry.group.dev.dependencies]
 black = "^23.3.0"
@@ -25,6 +27,10 @@ mkdocstrings = {extras = ["python"], version = "^0.21.2"}
 mkdocs-material = "^9.1.14"
 pytest = "^7.3.1"
 httpx = "^0.24.1"
+scikit-build = "0.13"
+
+[tool.poetry.extras]
+server = ["uvicorn", "fastapi", "sse-starlette"]
 
 [build-system]
 requires = [