Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit e6bbfb8

Browse filesBrowse files
authored
examples: fix quantize example (abetlen#1387)
@iyubondyrev thank you!
1 parent c58b561 commit e6bbfb8
Copy full SHA for e6bbfb8

File tree

Expand file treeCollapse file tree

1 file changed

+8
-5
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+8
-5
lines changed

‎examples/low_level_api/quantize.py

Copy file name to clipboardExpand all lines: examples/low_level_api/quantize.py
+8-5Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,16 @@
44

55

66
def main(args):
7+
fname_inp = args.fname_inp.encode("utf-8")
8+
fname_out = args.fname_out.encode("utf-8")
79
if not os.path.exists(fname_inp):
810
raise RuntimeError(f"Input file does not exist ({fname_inp})")
911
if os.path.exists(fname_out):
1012
raise RuntimeError(f"Output file already exists ({fname_out})")
11-
fname_inp = args.fname_inp.encode("utf-8")
12-
fname_out = args.fname_out.encode("utf-8")
13-
itype = args.itype
14-
return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, itype)
13+
ftype = args.type
14+
args = llama_cpp.llama_model_quantize_default_params()
15+
args.ftype = ftype
16+
return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, args)
1517
if return_code != 0:
1618
raise RuntimeError("Failed to quantize model")
1719

@@ -20,6 +22,7 @@ def main(args):
2022
parser = argparse.ArgumentParser()
2123
parser.add_argument("fname_inp", type=str, help="Path to input model")
2224
parser.add_argument("fname_out", type=str, help="Path to output model")
23-
parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1)")
25+
parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1), see llama_cpp.py for enum")
2426
args = parser.parse_args()
2527
main(args)
28+

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.