File tree Expand file tree Collapse file tree 4 files changed +20
-16
lines changed
Filter options
Expand file tree Collapse file tree 4 files changed +20
-16
lines changed
Original file line number Diff line number Diff line change @@ -355,16 +355,15 @@ def __init__(
355
355
verbose = self .verbose ,
356
356
)
357
357
358
- if self .lora_path :
359
- if self ._model .apply_lora_from_file (
360
- self .lora_path ,
361
- self .lora_scale ,
362
- self .lora_base ,
363
- self .n_threads ,
364
- ):
365
- raise RuntimeError (
366
- f"Failed to apply LoRA from lora path: { self .lora_path } to base path: { self .lora_base } "
367
- )
358
+ if self .lora_path and self ._model .apply_lora_from_file (
359
+ self .lora_path ,
360
+ self .lora_scale ,
361
+ self .lora_base ,
362
+ self .n_threads ,
363
+ ):
364
+ raise RuntimeError (
365
+ f"Failed to apply LoRA from lora path: { self .lora_path } to base path: { self .lora_base } "
366
+ )
368
367
369
368
if self .verbose :
370
369
print (llama_cpp .llama_print_system_info ().decode ("utf-8" ), file = sys .stderr )
@@ -450,7 +449,7 @@ def __init__(
450
449
if self .verbose :
451
450
print (f"Using fallback chat format: { chat_format } " , file = sys .stderr )
452
451
453
- def _load_control_vector (self , filepath : str , strength : float = 1.9 ):
452
+ def _load_control_vector (self , filepath : str , strength : float = 1.7 ):
454
453
if not os .path .exists (filepath ):
455
454
raise ValueError (f"Control vector file does not exist: { filepath } " )
456
455
if not filepath .endswith (".json" ):
Original file line number Diff line number Diff line change @@ -77,13 +77,13 @@ def create_completion_with_cache(
77
77
** kwargs ,
78
78
):
79
79
"""Predict the given prompt with the given max tokens and cache the result."""
80
-
80
+
81
81
if not stop_tokens :
82
82
stop_tokens = ["</s>" ]
83
83
84
84
print (prompt )
85
85
prompt = prompt .strip ()
86
-
86
+
87
87
if self .current_state == prompt :
88
88
print ("Prompt is the same as previous. Assuming new turn" )
89
89
# self.reset()
@@ -96,7 +96,7 @@ def create_completion_with_cache(
96
96
print ("Not using cached state" )
97
97
partial_prompt = prompt
98
98
self .reset ()
99
-
99
+
100
100
if to_eval :
101
101
print ("Evaluating partial prompt" )
102
102
prompt_tokens = self .get_prompt_tokens (partial_prompt )
@@ -119,7 +119,7 @@ def create_completion_with_cache(
119
119
for output in outputs :
120
120
yield output
121
121
results += output ["choices" ][0 ]["text" ]
122
-
122
+
123
123
self .current_state = prompt + results
124
124
125
125
if self .flush_cache :
Original file line number Diff line number Diff line change 11
11
from llama_cpp .server .settings import ModelSettings
12
12
from llama_cpp .managers .cache import LlamaCacheManager
13
13
14
+
14
15
class LlamaProxy :
15
16
def __init__ (self , models : List [ModelSettings ]) -> None :
16
17
assert models , "No models provided!"
@@ -134,6 +135,10 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
134
135
else :
135
136
create_fn = LlamaCacheManager
136
137
kwargs ["model_path" ] = settings .model
138
+ # kwargs["control_vectors"] = [
139
+ # "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/hearts_system/hearts_system.json",
140
+ # "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/hearts_system/hearts_system_critical.json"
141
+ # ]
137
142
138
143
_model = create_fn (
139
144
** kwargs ,
Original file line number Diff line number Diff line change 1
1
python llama_cpp/server --model " /Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-john-doe-ckpt-158-200.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000
2
2
3
- python llama_cpp/server --model " /Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-instruct-v0.2-v0.0.1.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000
3
+ python llama_cpp/server --model " /Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-instruct-v0.2-v0.0.1.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000 --host 0.0.0.0
4
4
5
5
python3 llama_cpp/server --model " ../data/mistral-fwd-john-doe-ckpt-158-200.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000
You can’t perform that action at this time.
0 commit comments