File tree Expand file tree Collapse file tree 2 files changed +17
-1
lines changed
Filter options
Expand file tree Collapse file tree 2 files changed +17
-1
lines changed
Original file line number Diff line number Diff line change @@ -2265,6 +2265,22 @@ def llama_set_n_threads(
2265
2265
...
2266
2266
2267
2267
2268
+ # // Get the number of threads used for generation of a single token.
2269
+ # LLAMA_API uint32_t llama_n_threads(struct llama_context * ctx);
2270
+ @ctypes_function ("llama_n_threads" , [llama_context_p_ctypes ], ctypes .c_uint32 )
2271
+ def llama_n_threads (ctx : llama_context_p , / ) -> int :
2272
+ """Get the number of threads used for generation of a single token"""
2273
+ ...
2274
+
2275
+
2276
+ # // Get the number of threads used for prompt and batch processing (multiple token).
2277
+ # LLAMA_API uint32_t llama_n_threads_batch(struct llama_context * ctx);
2278
+ @ctypes_function ("llama_n_threads_batch" , [llama_context_p_ctypes ], ctypes .c_uint32 )
2279
+ def llama_n_threads_batch (ctx : llama_context_p , / ) -> int :
2280
+ """Get the number of threads used for prompt and batch processing (multiple token)"""
2281
+ ...
2282
+
2283
+
2268
2284
# // Set whether to use causal attention or not
2269
2285
# // If set to true, the model will only attend to the past tokens
2270
2286
# LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);
You can’t perform that action at this time.
0 commit comments