Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 22b26c7

Browse filesBrowse files
authored
Merge branch 'abetlen:main' into main
2 parents 207a5c3 + f7b9e6d commit 22b26c7
Copy full SHA for 22b26c7

File tree

Expand file treeCollapse file tree

9 files changed

+95
-39
lines changed
Filter options
Expand file treeCollapse file tree

9 files changed

+95
-39
lines changed

‎CHANGELOG.md

Copy file name to clipboardExpand all lines: CHANGELOG.md
+14Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.85]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@398ede5efeb07b9adf9fbda7ea63f630d476a792
13+
- fix: Missing LoRA adapter after API change by @shamitv in #1630
14+
- fix(docker): Update Dockerfile BLAS options by @olivierdebauche in #1632
15+
- fix(docker): Fix GGML_CUDA param by @olivierdebauche in #1633
16+
- fix(docker): Update Dockerfile build options from `LLAMA_` to `GGML_` by @olivierdebauche in #1634
17+
- feat: FreeBSD compatibility by @yurivict in #1635
18+
19+
## [0.2.84]
20+
21+
- feat: Update llama.cpp to ggerganov/llama.cpp@4730faca618ff9cee0780580145e3cbe86f24876
22+
- fix: fix: Correcting run.sh filepath in Simple Docker implementation by @mashuk999 in #1626
23+
1024
## [0.2.83]
1125

1226
- feat: Update llama.cpp to ggerganov/llama.cpp@081fe431aa8fb6307145c4feb3eed4f48cab19f8

‎docker/cuda_simple/Dockerfile

Copy file name to clipboardExpand all lines: docker/cuda_simple/Dockerfile
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ COPY . .
1515

1616
# setting build related env vars
1717
ENV CUDA_DOCKER_ARCH=all
18-
ENV LLAMA_CUBLAS=1
18+
ENV GGML_CUDA=1
1919

2020
# Install depencencies
2121
RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
2222

2323
# Install llama-cpp-python (build with cuda)
24-
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
24+
RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python
2525

2626
# Run the server
2727
CMD python3 -m llama_cpp.server

‎docker/open_llama/Dockerfile

Copy file name to clipboardExpand all lines: docker/open_llama/Dockerfile
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fa
2020

2121
# Perform the conditional installations based on the image
2222
RUN echo "Image: ${IMAGE}" && \
23-
if [ "${IMAGE}" = "python:3-slim-bullseye" ] ; then \
23+
if [ "${IMAGE}" = "python:3-slim-bookworm" ] ; then \
2424
echo "OpenBLAS install:" && \
2525
apt-get install -y --no-install-recommends libopenblas-dev && \
26-
LLAMA_OPENBLAS=1 pip install llama-cpp-python --verbose; \
26+
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" pip install llama-cpp-python --verbose; \
2727
else \
2828
echo "CuBLAS install:" && \
29-
LLAMA_CUBLAS=1 pip install llama-cpp-python --verbose; \
29+
CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --verbose; \
3030
fi
3131

3232
# Clean up apt cache

‎docker/openblas_simple/Dockerfile

Copy file name to clipboardExpand all lines: docker/openblas_simple/Dockerfile
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ RUN apt update && apt install -y libopenblas-dev ninja-build build-essential pkg
1212

1313
RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings starlette-context
1414

15-
RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama_cpp_python --verbose
15+
RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" pip install llama_cpp_python --verbose
1616

1717
# Run the server
1818
CMD python3 -m llama_cpp.server

‎docker/simple/Dockerfile

Copy file name to clipboardExpand all lines: docker/simple/Dockerfile
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,4 @@ ENV PORT=8000
3535
EXPOSE 8000
3636

3737
# Run the server start script
38-
CMD ["/bin/sh", "/app/run.sh"]
38+
CMD ["/bin/sh", "/app/docker/simple/run.sh"]

‎llama_cpp/__init__.py

Copy file name to clipboard
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .llama_cpp import *
22
from .llama import *
33

4-
__version__ = "0.2.83"
4+
__version__ = "0.2.85"

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+6-3Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2088,11 +2088,14 @@ def pooling_type(self) -> str:
20882088

20892089
def close(self) -> None:
20902090
"""Explicitly free the model from memory."""
2091-
self._stack.close()
2091+
if hasattr(self,'_stack'):
2092+
if self._stack is not None:
2093+
self._stack.close()
20922094

20932095
def __del__(self) -> None:
2094-
if self._lora_adapter is not None:
2095-
llama_cpp.llama_lora_adapter_free(self._lora_adapter)
2096+
if hasattr(self,'_lora_adapter'):
2097+
if self._lora_adapter is not None:
2098+
llama_cpp.llama_lora_adapter_free(self._lora_adapter)
20962099
self.close()
20972100

20982101
@staticmethod

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+66-27Lines changed: 66 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _load_shared_library(lib_base_name: str):
2828
# for llamacpp) and "llama" (default name for this repo)
2929
_lib_paths: List[pathlib.Path] = []
3030
# Determine the file extension based on the platform
31-
if sys.platform.startswith("linux"):
31+
if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
3232
_lib_paths += [
3333
_base_path / f"lib{lib_base_name}.so",
3434
]
@@ -233,9 +233,6 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
233233
# define LLAMA_DEFAULT_SEED 0xFFFFFFFF
234234
LLAMA_DEFAULT_SEED = 0xFFFFFFFF
235235

236-
# define LLAMA_MAX_RNG_STATE (64*1024)
237-
LLAMA_MAX_RNG_STATE = 64 * 1024
238-
239236
# define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
240237
LLAMA_FILE_MAGIC_GGLA = 0x67676C61
241238

@@ -247,13 +244,13 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
247244

248245
# define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
249246
LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN
250-
# define LLAMA_SESSION_VERSION 7
251-
LLAMA_SESSION_VERSION = 7
247+
# define LLAMA_SESSION_VERSION 8
248+
LLAMA_SESSION_VERSION = 8
252249

253250
# define LLAMA_STATE_SEQ_MAGIC LLAMA_FILE_MAGIC_GGSQ
254251
LLAMA_STATE_SEQ_MAGIC = LLAMA_FILE_MAGIC_GGSQ
255-
# define LLAMA_STATE_SEQ_VERSION 1
256-
LLAMA_STATE_SEQ_VERSION = 1
252+
# define LLAMA_STATE_SEQ_VERSION 2
253+
LLAMA_STATE_SEQ_VERSION = 2
257254

258255
# struct llama_model;
259256
llama_model_p = NewType("llama_model_p", int)
@@ -1583,7 +1580,7 @@ def llama_lora_adapter_set(
15831580
...
15841581

15851582

1586-
# // Remove a LoRA adapter from given context
1583+
# // Remove a specific LoRA adapter from given context
15871584
# // Return -1 if the adapter is not present in the context
15881585
# LLAMA_API int32_t llama_lora_adapter_remove(
15891586
# struct llama_context * ctx,
@@ -1601,6 +1598,19 @@ def llama_lora_adapter_remove(
16011598
...
16021599

16031600

1601+
# // Remove all LoRA adapters from given context
1602+
# LLAMA_API void llama_lora_adapter_clear(
1603+
# struct llama_context * ctx);
1604+
@ctypes_function(
1605+
"llama_lora_adapter_clear",
1606+
[llama_context_p_ctypes],
1607+
None,
1608+
)
1609+
def llama_lora_adapter_clear(ctx: llama_context_p, /):
1610+
"""Remove all LoRA adapters from given context"""
1611+
...
1612+
1613+
16041614
# // Manually free a LoRA adapter
16051615
# // Note: loaded adapters will be free when the associated model is deleted
16061616
# LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
@@ -1992,17 +2002,17 @@ def llama_kv_cache_update(ctx: llama_context_p, /):
19922002
# //
19932003

19942004

1995-
# Returns the maximum size in bytes of the state (rng, logits, embedding
1996-
# and kv_cache) - will often be smaller after compacting tokens
1997-
# LLAMA_API size_t llama_state_get_size(const struct llama_context * ctx);
2005+
# // Returns the *actual* size in bytes of the state
2006+
# // (rng, logits, embedding and kv_cache)
2007+
# // Only use when saving the state, not when restoring it, otherwise the size may be too small.
2008+
# LLAMA_API size_t llama_state_get_size(struct llama_context * ctx);
19982009
@ctypes_function("llama_state_get_size", [llama_context_p_ctypes], ctypes.c_size_t)
19992010
def llama_state_get_size(ctx: llama_context_p, /) -> int:
2000-
"""Returns the maximum size in bytes of the state (rng, logits, embedding
2001-
and kv_cache) - will often be smaller after compacting tokens"""
2011+
"""Returns the *actual* size in bytes of the state (rng, logits, embedding and kv_cache) - will often be smaller after compacting tokens"""
20022012
...
20032013

20042014

2005-
# LLAMA_API DEPRECATED(size_t llama_get_state_size(const struct llama_context * ctx),
2015+
# LLAMA_API DEPRECATED(size_t llama_get_state_size(struct llama_context * ctx),
20062016
# "use llama_state_get_size instead");
20072017
@ctypes_function("llama_get_state_size", [llama_context_p_ctypes], ctypes.c_size_t)
20082018
def llama_get_state_size(ctx: llama_context_p, /) -> int:
@@ -2011,22 +2021,27 @@ def llama_get_state_size(ctx: llama_context_p, /) -> int:
20112021
...
20122022

20132023

2014-
# Copies the state to the specified destination address.
2015-
# Destination needs to have allocated enough memory.
2016-
# Returns the number of bytes copied
2024+
# // Copies the state to the specified destination address.
2025+
# // Destination needs to have allocated enough memory.
2026+
# // Returns the number of bytes copied
20172027
# LLAMA_API size_t llama_state_get_data(
20182028
# struct llama_context * ctx,
2019-
# uint8_t * dst);
2029+
# uint8_t * dst,
2030+
# size_t size);
20202031
@ctypes_function(
20212032
"llama_state_get_data",
20222033
[
20232034
llama_context_p_ctypes,
20242035
ctypes.POINTER(ctypes.c_uint8),
2036+
ctypes.c_size_t,
20252037
],
20262038
ctypes.c_size_t,
20272039
)
20282040
def llama_state_get_data(
2029-
ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], /
2041+
ctx: llama_context_p,
2042+
dst: CtypesArray[ctypes.c_uint8],
2043+
size: Union[ctypes.c_size_t, int],
2044+
/,
20302045
) -> int:
20312046
"""Copies the state to the specified destination address.
20322047
Destination needs to have allocated enough memory.
@@ -2059,14 +2074,18 @@ def llama_copy_state_data(
20592074
# // Returns the number of bytes read
20602075
# LLAMA_API size_t llama_state_set_data(
20612076
# struct llama_context * ctx,
2062-
# const uint8_t * src);
2077+
# const uint8_t * src,
2078+
# size_t size);
20632079
@ctypes_function(
20642080
"llama_state_set_data",
2065-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8)],
2081+
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), ctypes.c_size_t],
20662082
ctypes.c_size_t,
20672083
)
20682084
def llama_state_set_data(
2069-
ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], /
2085+
ctx: llama_context_p,
2086+
src: CtypesArray[ctypes.c_uint8],
2087+
size: Union[ctypes.c_size_t, int],
2088+
/,
20702089
) -> int:
20712090
"""Set the state reading from the specified address
20722091
Returns the number of bytes read"""
@@ -2216,14 +2235,24 @@ def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id, /) -> i
22162235
# LLAMA_API size_t llama_state_seq_get_data(
22172236
# struct llama_context * ctx,
22182237
# uint8_t * dst,
2238+
# size_t size,
22192239
# llama_seq_id seq_id);
22202240
@ctypes_function(
22212241
"llama_state_seq_get_data",
2222-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), llama_seq_id],
2242+
[
2243+
llama_context_p_ctypes,
2244+
ctypes.POINTER(ctypes.c_uint8),
2245+
ctypes.c_size_t,
2246+
llama_seq_id,
2247+
],
22232248
ctypes.c_size_t,
22242249
)
22252250
def llama_state_seq_get_data(
2226-
ctx: llama_context_p, dst: CtypesArray[ctypes.c_uint8], seq_id: llama_seq_id, /
2251+
ctx: llama_context_p,
2252+
dst: CtypesArray[ctypes.c_uint8],
2253+
size: Union[ctypes.c_size_t, int],
2254+
seq_id: llama_seq_id,
2255+
/,
22272256
) -> int:
22282257
"""Copy the KV cache of a single sequence into the specified buffer"""
22292258
...
@@ -2236,14 +2265,24 @@ def llama_state_seq_get_data(
22362265
# LLAMA_API size_t llama_state_seq_set_data(
22372266
# struct llama_context * ctx,
22382267
# const uint8_t * src,
2268+
# size_t size,
22392269
# llama_seq_id dest_seq_id);
22402270
@ctypes_function(
22412271
"llama_state_seq_set_data",
2242-
[llama_context_p_ctypes, ctypes.POINTER(ctypes.c_uint8), llama_seq_id],
2272+
[
2273+
llama_context_p_ctypes,
2274+
ctypes.POINTER(ctypes.c_uint8),
2275+
ctypes.c_size_t,
2276+
llama_seq_id,
2277+
],
22432278
ctypes.c_size_t,
22442279
)
22452280
def llama_state_seq_set_data(
2246-
ctx: llama_context_p, src: CtypesArray[ctypes.c_uint8], dest_seq_id: llama_seq_id, /
2281+
ctx: llama_context_p,
2282+
src: CtypesArray[ctypes.c_uint8],
2283+
size: Union[ctypes.c_size_t, int],
2284+
dest_seq_id: llama_seq_id,
2285+
/,
22472286
) -> int:
22482287
"""Copy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequence"""
22492288
...

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.