Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit bbbf0f4

Browse filesBrowse files
committed
Update llama.cpp
1 parent 8fc3fa9 commit bbbf0f4
Copy full SHA for bbbf0f4

File tree

Expand file treeCollapse file tree

3 files changed

+396
-316
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+396
-316
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+17-13Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -371,8 +371,8 @@ def __init__(
371371
sorted=sorted,
372372
)
373373
self._candidates = candidates
374-
self._token_nl = Llama.token_nl()
375-
self._token_eos = Llama.token_eos()
374+
self._token_nl = self.token_nl()
375+
self._token_eos = self.token_eos()
376376
self._candidates_data_id = np.arange(self._n_vocab, dtype=np.intc) # type: ignore
377377
self._candidates_data_p = np.zeros(self._n_vocab, dtype=np.single)
378378

@@ -450,10 +450,14 @@ def detokenize(self, tokens: List[int]) -> bytes:
450450
"""
451451
assert self.ctx is not None
452452
output = b""
453+
buffer_size = 32
454+
buffer = (ctypes.c_char * buffer_size)()
453455
for token in tokens:
454-
output += llama_cpp.llama_token_to_str(
455-
self.ctx, llama_cpp.llama_token(token)
456+
n = llama_cpp.llama_token_to_str(
457+
self.ctx, llama_cpp.llama_token(token), buffer, buffer_size
456458
)
459+
assert n <= buffer_size
460+
output += bytes(buffer[:n])
457461
return output
458462

459463
def set_cache(self, cache: Optional[BaseLlamaCache]):
@@ -1681,20 +1685,20 @@ def tokenizer(self) -> "LlamaTokenizer":
16811685
assert self.ctx is not None
16821686
return LlamaTokenizer(self)
16831687

1684-
@staticmethod
1685-
def token_eos() -> int:
1688+
def token_eos(self) -> int:
16861689
"""Return the end-of-sequence token."""
1687-
return llama_cpp.llama_token_eos()
1690+
assert self.ctx is not None
1691+
return llama_cpp.llama_token_eos(self.ctx)
16881692

1689-
@staticmethod
1690-
def token_bos() -> int:
1693+
def token_bos(self) -> int:
16911694
"""Return the beginning-of-sequence token."""
1692-
return llama_cpp.llama_token_bos()
1695+
assert self.ctx is not None
1696+
return llama_cpp.llama_token_bos(self.ctx)
16931697

1694-
@staticmethod
1695-
def token_nl() -> int:
1698+
def token_nl(self) -> int:
16961699
"""Return the newline token."""
1697-
return llama_cpp.llama_token_nl()
1700+
assert self.ctx is not None
1701+
return llama_cpp.llama_token_nl(self.ctx)
16981702

16991703
@staticmethod
17001704
def logits_to_logprobs(logits: List[float]) -> List[float]:

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.