Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 63d8a3c

Browse filesBrowse files
authored
Merge pull request abetlen#63 from SagsMug/main
Low level chat: Added iterative search to prevent instructions from being echoed
2 parents 241d608 + 0cccb41 commit 63d8a3c
Copy full SHA for 63d8a3c

File tree

Expand file treeCollapse file tree

2 files changed

+35
-4
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+35
-4
lines changed

‎examples/low_level_api/common.py

Copy file name to clipboardExpand all lines: examples/low_level_api/common.py
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class GptParams:
4040
instruct: bool = False
4141
ignore_eos: bool = False
4242
perplexity: bool = False
43+
use_mmap: bool = True
4344
use_mlock: bool = False
4445
mem_test: bool = False
4546
verbose_prompt: bool = False
@@ -110,7 +111,9 @@ def gpt_params_parse(argv = None, params: Optional[GptParams] = None):
110111
dest="use_color"
111112
)
112113
parser.add_argument("--mlock", action="store_true",help="force system to keep model in RAM rather than swapping or compressing",dest="use_mlock")
114+
parser.add_argument("--no-mmap", action="store_false",help="do not memory-map model (slower load but may reduce pageouts if not using mlock)",dest="use_mmap")
113115
parser.add_argument("--mtest", action="store_true",help="compute maximum memory usage",dest="mem_test")
116+
parser.add_argument("--verbose-prompt", action="store_true",help="print prompt before generation",dest="verbose_prompt")
114117
parser.add_argument(
115118
"-r",
116119
"--reverse-prompt",

‎examples/low_level_api/low_level_api_chat_cpp.py

Copy file name to clipboardExpand all lines: examples/low_level_api/low_level_api_chat_cpp.py
+32-4Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,25 @@
2626
CONSOLE_COLOR_PROMPT = ANSI_COLOR_YELLOW
2727
CONSOLE_COLOR_USER_INPUT = ANSI_BOLD + ANSI_COLOR_GREEN
2828

29+
# Iterative search
30+
# Actively searches and prevents a pattern from being returned
31+
class IterSearch:
32+
def __init__(self, pattern):
33+
self.pattern = list(pattern)
34+
self.buffer = []
35+
36+
def __call__(self, char):
37+
self.buffer += [char]
38+
39+
if (self.pattern[:len(self.buffer)] == self.buffer):
40+
if (len(self.buffer) >= len(self.pattern)):
41+
self.buffer.clear()
42+
return []
43+
44+
_tmp = self.buffer[:]
45+
self.buffer.clear()
46+
return _tmp
47+
2948
# A LLaMA interactive session
3049
class LLaMAInteract:
3150
def __init__(self, params: GptParams) -> None:
@@ -69,6 +88,7 @@ def __init__(self, params: GptParams) -> None:
6988
self.lparams.seed = self.params.seed
7089
self.lparams.memory_f16 = self.params.memory_f16
7190
self.lparams.use_mlock = self.params.use_mlock
91+
self.lparams.use_mmap = self.params.use_mmap
7292

7393
self.ctx = llama_cpp.llama_init_from_file(self.params.model.encode("utf8"), self.lparams)
7494
if (not self.ctx):
@@ -114,7 +134,9 @@ def __init__(self, params: GptParams) -> None:
114134
# in instruct mode, we inject a prefix and a suffix to each input by the user
115135
if (self.params.instruct):
116136
self.params.interactive_start = True
117-
self.first_antiprompt.append(self._tokenize(self.params.instruct_inp_prefix.strip(), False))
137+
_ptn = self._tokenize(self.params.instruct_inp_prefix.strip(), False)
138+
self.first_antiprompt.append(_ptn)
139+
self.antiecho = IterSearch(_ptn)
118140

119141
# enable interactive mode if reverse prompt or interactive start is specified
120142
if (len(self.params.antiprompt) != 0 or self.params.interactive_start):
@@ -217,7 +239,9 @@ def generate(self):
217239
if len(self.embd_inp) <= self.input_consumed:
218240
# out of user input, sample next token
219241

220-
#TODO: self.params.ignore_eos
242+
if (self.params.ignore_eos):
243+
logits = llama_cpp.llama_get_logits(self.ctx)
244+
logits[llama_cpp.llama_token_eos()] = llama_cpp.c_float(0)
221245

222246
_arr = self.last_n_tokens[-min(self.params.repeat_last_n, self.n_past):]
223247
id = llama_cpp.llama_sample_top_p_top_k(
@@ -263,7 +287,11 @@ def generate(self):
263287
# display tokens
264288
if self.output_echo:
265289
for id in self.embd:
266-
yield id
290+
if self.params.instruct:
291+
for r in self.antiecho(id):
292+
yield r
293+
else:
294+
yield id
267295

268296
# reset color to default if we there is no pending user input
269297
if (self.params.input_echo and len(self.embd_inp) == self.input_consumed):
@@ -279,7 +307,7 @@ def generate(self):
279307
break
280308

281309
# if we are using instruction mode, and we have processed the initial prompt
282-
if (self.n_past > 0 and self.params.interactive_start):
310+
if (self.params.interactive_start):
283311
break
284312

285313
# end of text token

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.