Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 0d34e84

Browse filesBrowse files
committed
new version
1 parent 5a57d7f commit 0d34e84
Copy full SHA for 0d34e84

File tree

Expand file treeCollapse file tree

4 files changed

+20
-16
lines changed
Filter options
Expand file treeCollapse file tree

4 files changed

+20
-16
lines changed

‎llama_cpp/llama.py

Copy file name to clipboardExpand all lines: llama_cpp/llama.py
+10-11Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -355,16 +355,15 @@ def __init__(
355355
verbose=self.verbose,
356356
)
357357

358-
if self.lora_path:
359-
if self._model.apply_lora_from_file(
360-
self.lora_path,
361-
self.lora_scale,
362-
self.lora_base,
363-
self.n_threads,
364-
):
365-
raise RuntimeError(
366-
f"Failed to apply LoRA from lora path: {self.lora_path} to base path: {self.lora_base}"
367-
)
358+
if self.lora_path and self._model.apply_lora_from_file(
359+
self.lora_path,
360+
self.lora_scale,
361+
self.lora_base,
362+
self.n_threads,
363+
):
364+
raise RuntimeError(
365+
f"Failed to apply LoRA from lora path: {self.lora_path} to base path: {self.lora_base}"
366+
)
368367

369368
if self.verbose:
370369
print(llama_cpp.llama_print_system_info().decode("utf-8"), file=sys.stderr)
@@ -450,7 +449,7 @@ def __init__(
450449
if self.verbose:
451450
print(f"Using fallback chat format: {chat_format}", file=sys.stderr)
452451

453-
def _load_control_vector(self, filepath: str, strength: float = 1.9):
452+
def _load_control_vector(self, filepath: str, strength: float = 1.7):
454453
if not os.path.exists(filepath):
455454
raise ValueError(f"Control vector file does not exist: {filepath}")
456455
if not filepath.endswith(".json"):

‎llama_cpp/managers/cache.py

Copy file name to clipboardExpand all lines: llama_cpp/managers/cache.py
+4-4Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,13 +77,13 @@ def create_completion_with_cache(
7777
**kwargs,
7878
):
7979
"""Predict the given prompt with the given max tokens and cache the result."""
80-
80+
8181
if not stop_tokens:
8282
stop_tokens = ["</s>"]
8383

8484
print(prompt)
8585
prompt = prompt.strip()
86-
86+
8787
if self.current_state == prompt:
8888
print("Prompt is the same as previous. Assuming new turn")
8989
# self.reset()
@@ -96,7 +96,7 @@ def create_completion_with_cache(
9696
print("Not using cached state")
9797
partial_prompt = prompt
9898
self.reset()
99-
99+
100100
if to_eval:
101101
print("Evaluating partial prompt")
102102
prompt_tokens = self.get_prompt_tokens(partial_prompt)
@@ -119,7 +119,7 @@ def create_completion_with_cache(
119119
for output in outputs:
120120
yield output
121121
results += output["choices"][0]["text"]
122-
122+
123123
self.current_state = prompt + results
124124

125125
if self.flush_cache:

‎llama_cpp/server/model.py

Copy file name to clipboardExpand all lines: llama_cpp/server/model.py
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from llama_cpp.server.settings import ModelSettings
1212
from llama_cpp.managers.cache import LlamaCacheManager
1313

14+
1415
class LlamaProxy:
1516
def __init__(self, models: List[ModelSettings]) -> None:
1617
assert models, "No models provided!"
@@ -134,6 +135,10 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
134135
else:
135136
create_fn = LlamaCacheManager
136137
kwargs["model_path"] = settings.model
138+
# kwargs["control_vectors"] = [
139+
# "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/hearts_system/hearts_system.json",
140+
# "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/hearts_system/hearts_system_critical.json"
141+
# ]
137142

138143
_model = create_fn(
139144
**kwargs,

‎scripts/start.sh

Copy file name to clipboard
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
python llama_cpp/server --model "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-john-doe-ckpt-158-200.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000
22

3-
python llama_cpp/server --model "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-instruct-v0.2-v0.0.1.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000
3+
python llama_cpp/server --model "/Users/sengwee.ngui/Library/CloudStorage/OneDrive-TemusPte.Ltd/Documents/projects/SuperAdapters/data/llms/mistral-fwd-instruct-v0.2-v0.0.1.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000 --host 0.0.0.0
44

55
python3 llama_cpp/server --model "../data/mistral-fwd-john-doe-ckpt-158-200.gguf" --n_gpu_layers 64 --n_ctx 8192 --n_batch 2048 --last_n_tokens_size 4000

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.