Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 8628db8

Browse filesBrowse files
authored
Merge branch 'abetlen:main' into main
2 parents 20c965a + 3c7501b commit 8628db8
Copy full SHA for 8628db8

File tree

Expand file treeCollapse file tree

5 files changed

+34
-29
lines changed
Filter options
Expand file treeCollapse file tree

5 files changed

+34
-29
lines changed

‎llama_cpp/_internals.py

Copy file name to clipboardExpand all lines: llama_cpp/_internals.py
+4-4Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,11 @@ def token_eot(self) -> int:
179179
assert self.model is not None
180180
return llama_cpp.llama_token_eot(self.model)
181181

182-
def add_bos_token(self) -> int:
182+
def add_bos_token(self) -> bool:
183183
assert self.model is not None
184184
return llama_cpp.llama_add_bos_token(self.model)
185185

186-
def add_eos_token(self) -> int:
186+
def add_eos_token(self) -> bool:
187187
assert self.model is not None
188188
return llama_cpp.llama_add_eos_token(self.model)
189189

@@ -691,8 +691,8 @@ def _detokenize_bpe(model: _LlamaModel, tokens: List[int]) -> str:
691691
def _should_add_bos(model: _LlamaModel) -> bool:
692692
assert model.model is not None
693693
add_bos = llama_cpp.llama_add_bos_token(model.model)
694-
if add_bos != -1:
695-
return add_bos != 0
694+
if add_bos:
695+
return add_bos
696696
else:
697697
return llama_cpp.llama_vocab_type(model.model) == llama_cpp.LLAMA_VOCAB_TYPE_SPM
698698

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+13-10Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def __init__(
198198
A Llama instance.
199199
"""
200200
self.verbose = verbose
201+
self._stack = contextlib.ExitStack()
201202

202203
set_verbose(verbose)
203204

@@ -365,8 +366,6 @@ def __init__(
365366
if not os.path.exists(model_path):
366367
raise ValueError(f"Model path does not exist: {model_path}")
367368

368-
self._stack = contextlib.ExitStack()
369-
370369
self._model = self._stack.enter_context(
371370
contextlib.closing(
372371
_LlamaModel(
@@ -420,6 +419,15 @@ def __init__(
420419
raise RuntimeError(
421420
f"Failed to initialize LoRA adapter from lora path: {self.lora_path}"
422421
)
422+
423+
def free_lora_adapter():
424+
if self._lora_adapter is None:
425+
return
426+
llama_cpp.llama_lora_adapter_free(self._lora_adapter)
427+
self._lora_adapter = None
428+
429+
self._stack.callback(free_lora_adapter)
430+
423431
assert self._ctx.ctx is not None
424432
if llama_cpp.llama_lora_adapter_set(
425433
self._ctx.ctx, self._lora_adapter, self.lora_scale
@@ -1058,13 +1066,13 @@ def _create_completion(
10581066

10591067
if (
10601068
(isinstance(prompt, list) and suffix is None)
1061-
or self._model.add_bos_token() == 0
1069+
or not self._model.add_bos_token()
10621070
or bos_tokens[:1] == [-1]
10631071
):
10641072
bos_tokens = []
10651073

10661074
if (isinstance(prompt, list) and suffix is None) or (
1067-
self._model.add_eos_token() != 1 and sep_token_id == -1
1075+
not self._model.add_eos_token() and sep_token_id == -1
10681076
):
10691077
eos_tokens = []
10701078

@@ -2090,14 +2098,9 @@ def pooling_type(self) -> str:
20902098

20912099
def close(self) -> None:
20922100
"""Explicitly free the model from memory."""
2093-
if hasattr(self,'_stack'):
2094-
if self._stack is not None:
2095-
self._stack.close()
2101+
self._stack.close()
20962102

20972103
def __del__(self) -> None:
2098-
if hasattr(self,'_lora_adapter'):
2099-
if self._lora_adapter is not None:
2100-
llama_cpp.llama_lora_adapter_free(self._lora_adapter)
21012104
self.close()
21022105

21032106
@staticmethod

‎llama_cpp/llama_cpp.py

Copy file name to clipboardExpand all lines: llama_cpp/llama_cpp.py
+12-14Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,8 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
314314
# LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
315315
# LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
316316
# LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
317+
# LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
318+
# LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
317319
# };
318320
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
319321
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -338,20 +340,20 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
338340
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20
339341
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21
340342
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22
343+
LLAMA_VOCAB_PRE_TYPE_BLOOM = 23
344+
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24
341345

342346

343347
# // note: these values should be synchronized with ggml_rope
344348
# // TODO: maybe move this enum to ggml.h (ggml_rope_type)
345349
# enum llama_rope_type {
346350
# LLAMA_ROPE_TYPE_NONE = -1,
347351
# LLAMA_ROPE_TYPE_NORM = 0,
348-
# LLAMA_ROPE_TYPE_NEOX = 2,
349-
# LLAMA_ROPE_TYPE_GLM = 4,
352+
# LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX,
350353
# };
351354
LLAMA_ROPE_TYPE_NONE = -1
352355
LLAMA_ROPE_TYPE_NORM = 0
353-
LLAMA_ROPE_TYPE_NEOX = 2
354-
LLAMA_ROPE_TYPE_GLM = 4
356+
LLAMA_ROPE_TYPE_NEOX = GGML_ROPE_TYPE_NEOX = 2
355357

356358

357359
# enum llama_token_type { //TODO: remove, required until per token attributes are available from GGUF file
@@ -2741,19 +2743,15 @@ def llama_token_nl(model: llama_model_p, /) -> int:
27412743
...
27422744

27432745

2744-
# // Returns -1 if unknown, 1 for true or 0 for false.
2745-
# LLAMA_API int32_t llama_add_bos_token(const struct llama_model * model);
2746-
@ctypes_function("llama_add_bos_token", [llama_model_p_ctypes], ctypes.c_int32)
2747-
def llama_add_bos_token(model: llama_model_p, /) -> int:
2748-
"""Returns -1 if unknown, 1 for true or 0 for false."""
2746+
# LLAMA_API bool llama_add_bos_token(const struct llama_model * model);
2747+
@ctypes_function("llama_add_bos_token", [llama_model_p_ctypes], ctypes.c_bool)
2748+
def llama_add_bos_token(model: llama_model_p, /) -> bool:
27492749
...
27502750

27512751

2752-
# // Returns -1 if unknown, 1 for true or 0 for false.
2753-
# LLAMA_API int32_t llama_add_eos_token(const struct llama_model * model);
2754-
@ctypes_function("llama_add_eos_token", [llama_model_p_ctypes], ctypes.c_int32)
2755-
def llama_add_eos_token(model: llama_model_p, /) -> int:
2756-
"""Returns -1 if unknown, 1 for true or 0 for false."""
2752+
# LLAMA_API bool llama_add_eos_token(const struct llama_model * model);
2753+
@ctypes_function("llama_add_eos_token", [llama_model_p_ctypes], ctypes.c_bool)
2754+
def llama_add_eos_token(model: llama_model_p, /) -> bool:
27572755
...
27582756

27592757

‎pyproject.toml

Copy file name to clipboardExpand all lines: pyproject.toml
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ test = [
4141
"pytest>=7.4.0",
4242
"httpx>=0.24.1",
4343
"scipy>=1.10",
44+
"fastapi>=0.100.0",
45+
"sse-starlette>=1.6.1",
46+
"starlette-context>=0.3.6,<0.4",
47+
"pydantic-settings>=2.0.1",
4448
]
4549
dev = [
4650
"black>=23.3.0",

‎vendor/llama.cpp

Copy file name to clipboard

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.