Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 16fc5b5

Browse filesBrowse files
author
Mug
committed
More interoperability to the original llama.cpp, and arguments now work
1 parent 10c7571 commit 16fc5b5
Copy full SHA for 16fc5b5

File tree

Expand file treeCollapse file tree

4 files changed

+55
-43
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+55
-43
lines changed

‎examples/__init__.py

Copy file name to clipboardExpand all lines: examples/__init__.py
Whitespace-only changes.

‎examples/low_level_api/__init__.py

Copy file name to clipboardExpand all lines: examples/low_level_api/__init__.py
Whitespace-only changes.

‎examples/common.py renamed to ‎examples/low_level_api/common.py

Copy file name to clipboardExpand all lines: examples/low_level_api/common.py
+46-33Lines changed: 46 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,6 @@ class GptParams:
2626
model: str = "./models/llama-7B/ggml-model.bin"
2727
prompt: str = ""
2828
input_prefix: str = " "
29-
fix_prefix: str = ""
30-
output_postfix: str = ""
31-
input_echo: bool = True,
3229

3330
antiprompt: List[str] = field(default_factory=list)
3431

@@ -47,41 +44,57 @@ class GptParams:
4744
mem_test: bool = False
4845
verbose_prompt: bool = False
4946

47+
file: str = None
48+
49+
# If chat ended prematurely, append this to the conversation to fix it.
50+
# Set to "\nUser:" etc.
51+
# This is an alternative to input_prefix which always adds it, so it potentially duplicates "User:""
52+
fix_prefix: str = " "
53+
output_postfix: str = ""
54+
input_echo: bool = True,
55+
5056
# Default instructions for Alpaca
5157
# switch to "Human" and "Assistant" for Vicuna.
52-
instruct_inp_prefix: str="\n\n### Instruction:\n\n",
53-
instruct_inp_suffix: str="\n\n### Response:\n\n",
58+
# TODO: TBD how they are gonna handle this upstream
59+
instruct_inp_prefix: str="\n\n### Instruction:\n\n"
60+
instruct_inp_suffix: str="\n\n### Response:\n\n"
5461

5562

5663
def gpt_params_parse(argv = None, params: Optional[GptParams] = None):
5764
if params is None:
5865
params = GptParams()
5966

60-
parser = argparse.ArgumentParser()
61-
parser.add_argument("-h", "--help", action="store_true", help="show this help message and exit")
62-
parser.add_argument("-s", "--seed", type=int, default=-1, help="",dest="seed")
63-
parser.add_argument("-t", "--threads", type=int, default=1, help="",dest="n_threads")
64-
parser.add_argument("-p", "--prompt", type=str, default="", help="",dest="prompt")
65-
parser.add_argument("-f", "--file", type=str, default=None, help="")
66-
parser.add_argument("-c", "--ctx_size", type=int, default=512, help="",dest="n_ctx")
67-
parser.add_argument("--memory_f32", action="store_false", help="",dest="memory_f16")
68-
parser.add_argument("--top_p", type=float, default=0.9, help="",dest="top_p")
69-
parser.add_argument("--temp", type=float, default=1.0, help="",dest="temp")
70-
parser.add_argument("--repeat_last_n", type=int, default=64, help="",dest="repeat_last_n")
71-
parser.add_argument("--repeat_penalty", type=float, default=1.0, help="",dest="repeat_penalty")
72-
parser.add_argument("-b", "--batch_size", type=int, default=8, help="",dest="n_batch")
73-
parser.add_argument("--keep", type=int, default=0, help="",dest="n_keep")
74-
parser.add_argument("-m", "--model", type=str, help="",dest="model")
67+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
68+
parser.add_argument("-s", "--seed", type=int, default=-1, help="RNG seed (use random seed for <= 0)",dest="seed")
69+
parser.add_argument("-t", "--threads", type=int, default=min(4, os.cpu_count() or 1), help="number of threads to use during computation",dest="n_threads")
70+
parser.add_argument("-p", "--prompt", type=str, default="", help="initial prompt",dest="prompt")
71+
parser.add_argument("-f", "--file", type=str, default=None, help="file containing initial prompt to load",dest="file")
72+
parser.add_argument("-c", "--ctx_size", type=int, default=512, help="size of the prompt context",dest="n_ctx")
73+
parser.add_argument("--memory_f32", action="store_false", help="use f32 instead of f16 for memory key+value",dest="memory_f16")
74+
parser.add_argument("--top_p", type=float, default=0.95, help="top-p samplin",dest="top_p")
75+
parser.add_argument("--top_k", type=int, default=40, help="top-k sampling",dest="top_k")
76+
parser.add_argument("--temp", type=float, default=0.80, help="temperature",dest="temp")
77+
parser.add_argument("--n_predict", type=int, default=128, help="number of model parts",dest="n_predict")
78+
parser.add_argument("--repeat_last_n", type=int, default=64, help="last n tokens to consider for penalize ",dest="repeat_last_n")
79+
parser.add_argument("--repeat_penalty", type=float, default=1.10, help="penalize repeat sequence of tokens",dest="repeat_penalty")
80+
parser.add_argument("-b", "--batch_size", type=int, default=8, help="batch size for prompt processing",dest="n_batch")
81+
parser.add_argument("--keep", type=int, default=0, help="number of tokens to keep from the initial prompt",dest="n_keep")
82+
parser.add_argument("-m", "--model", type=str, default="./models/llama-7B/ggml-model.bin", help="model path",dest="model")
7583
parser.add_argument(
7684
"-i", "--interactive", action="store_true", help="run in interactive mode", dest="interactive"
7785
)
7886
parser.add_argument("--embedding", action="store_true", help="", dest="embedding")
79-
parser.add_argument("--interactive-start", action="store_true", help="", dest="interactive_start")
87+
parser.add_argument(
88+
"--interactive-start",
89+
action="store_true",
90+
help="run in interactive mode",
91+
dest="interactive"
92+
)
8093
parser.add_argument(
8194
"--interactive-first",
8295
action="store_true",
8396
help="run in interactive mode and wait for input right away",
84-
dest="interactive"
97+
dest="interactive_start"
8598
)
8699
parser.add_argument(
87100
"-ins",
@@ -96,24 +109,24 @@ def gpt_params_parse(argv = None, params: Optional[GptParams] = None):
96109
help="colorise output to distinguish prompt and user input from generations",
97110
dest="use_color"
98111
)
99-
parser.add_argument("--mlock", action="store_true",dest="use_mlock")
100-
parser.add_argument("--mtest", action="store_true",dest="mem_test")
112+
parser.add_argument("--mlock", action="store_true",help="force system to keep model in RAM rather than swapping or compressing",dest="use_mlock")
113+
parser.add_argument("--mtest", action="store_true",help="compute maximum memory usage",dest="mem_test")
101114
parser.add_argument(
102115
"-r",
103116
"--reverse-prompt",
104117
type=str,
105118
action='append',
106-
help="run in interactive mode and poll user input upon seeing PROMPT (can be\nspecified more than once for multiple prompts).",
119+
help="poll user input upon seeing PROMPT (can be\nspecified more than once for multiple prompts).",
107120
dest="antiprompt"
108121
)
109-
parser.add_argument("--perplexity", action="store_true", help="", dest="perplexity")
110-
parser.add_argument("--ignore-eos", action="store_true", help="", dest="ignore_eos")
111-
parser.add_argument("--n_parts", type=int, default=-1, help="", dest="n_parts")
112-
parser.add_argument("--random-prompt", action="store_true", help="", dest="random_prompt")
113-
parser.add_argument("--in-prefix", type=str, default=" ", help="", dest="input_prefix")
114-
parser.add_argument("--fix-prefix", type=str, default=" ", help="", dest="fix_prefix")
115-
parser.add_argument("--out-postfix", type=str, default="", help="", dest="output_postfix")
116-
parser.add_argument("--input-noecho", action="store_false", help="", dest="input_echo")
122+
parser.add_argument("--perplexity", action="store_true", help="compute perplexity over the prompt", dest="perplexity")
123+
parser.add_argument("--ignore-eos", action="store_true", help="ignore end of stream token and continue generating", dest="ignore_eos")
124+
parser.add_argument("--n_parts", type=int, default=-1, help="number of model parts", dest="n_parts")
125+
parser.add_argument("--random-prompt", action="store_true", help="start with a randomized prompt.", dest="random_prompt")
126+
parser.add_argument("--in-prefix", type=str, default="", help="string to prefix user inputs with", dest="input_prefix")
127+
parser.add_argument("--fix-prefix", type=str, default="", help="append to input when generated n_predict tokens", dest="fix_prefix")
128+
parser.add_argument("--out-postfix", type=str, default="", help="append to input", dest="output_postfix")
129+
parser.add_argument("--input-noecho", action="store_false", help="dont output the input", dest="input_echo")
117130
args = parser.parse_args(argv)
118131
return args
119132

‎examples/low_level_api/low_level_api_chatllama_cpp.py renamed to ‎examples/low_level_api/low_level_api_chat_cpp.py

Copy file name to clipboardExpand all lines: examples/low_level_api/low_level_api_chat_cpp.py
+9-10Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
* The first antiprompt should be the userprompt like "\nUser:",
77
because its added when n_predict is reached (aka generation ended prematurely)
88
* n_predict can be set to -1 for unlimited length responses (or just a really high value)
9-
* It's always in interactive mode, generation ends either by reaching an antiprompt
10-
or running out of n_predict.
119
* Instruction mode adds its own antiprompt.
1210
You should also still be feeding the model with a "primer" prompt that
1311
shows it the expected format.
@@ -59,7 +57,6 @@ def __init__(self, params: GptParams) -> None:
5957

6058
# runtime args
6159
self.input_consumed = 0
62-
self.embd = []
6360
self.n_past = 0
6461
self.first_antiprompt = []
6562
self.remaining_tokens = self.params.n_predict
@@ -74,7 +71,7 @@ def __init__(self, params: GptParams) -> None:
7471
self.lparams.use_mlock = self.params.use_mlock
7572

7673
self.ctx = llama_cpp.llama_init_from_file(self.params.model.encode("utf8"), self.lparams)
77-
if (self.ctx == 0):
74+
if (not self.ctx):
7875
raise RuntimeError(f"error: failed to load model '{self.params.model}'")
7976

8077
print(file=sys.stderr)
@@ -95,7 +92,13 @@ def __init__(self, params: GptParams) -> None:
9592
# Add a space in front of the first character to match OG llama tokenizer behavior
9693
self.params.prompt = " " + self.params.prompt
9794

95+
# Load prompt file
96+
if (self.params.file):
97+
with open(self.params.file) as f:
98+
self.params.prompt = f.read()
99+
98100
# tokenize the prompt
101+
self.embd = []
99102
self.embd_inp = self._tokenize(self.params.prompt)
100103

101104
if (len(self.embd_inp) > self.params.n_ctx - 4):
@@ -384,11 +387,7 @@ def interact(self):
384387
{AI_NAME}: Blue
385388
{USER_NAME}:"""
386389
args = gpt_params_parse()
387-
params = GptParams(args)
388-
389-
if (args.file):
390-
with open(args.file) as f:
391-
params.prompt = f.read()
390+
params = GptParams(**vars(args))
392391

393-
with LLaMAInteract() as m:
392+
with LLaMAInteract(params) as m:
394393
m.interact()

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.