Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit d4f8f52

Browse filesBrowse files
committed
llama : move sampling code into llama-sampling
ggml-ci
1 parent 1666f92 commit d4f8f52
Copy full SHA for d4f8f52

File tree

Expand file treeCollapse file tree

7 files changed

+757
-699
lines changed
Filter options
Expand file treeCollapse file tree

7 files changed

+757
-699
lines changed

‎Makefile

Copy file name to clipboardExpand all lines: Makefile
+9Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,7 @@ OBJ_GGML += \
868868

869869
OBJ_LLAMA = \
870870
src/llama.o \
871+
src/llama-sampling.o \
871872
src/unicode.o \
872873
src/unicode-data.o
873874

@@ -1047,6 +1048,7 @@ src/unicode-data.o: \
10471048

10481049
src/llama.o: \
10491050
src/llama.cpp \
1051+
src/llama-impl.h \
10501052
src/unicode.h \
10511053
include/llama.h \
10521054
ggml/include/ggml-cuda.h \
@@ -1056,6 +1058,13 @@ src/llama.o: \
10561058
ggml/include/ggml-backend.h
10571059
$(CXX) $(CXXFLAGS) -c $< -o $@
10581060

1061+
src/llama-sampling.o: \
1062+
src/llama-sampling.cpp \
1063+
src/llama-sampling.h \
1064+
src/llama-impl.h \
1065+
include/llama.h
1066+
$(CXX) $(CXXFLAGS) -c $< -o $@
1067+
10591068
$(LIB_LLAMA): \
10601069
$(OBJ_LLAMA) \
10611070
$(LIB_GGML)

‎include/llama.h

Copy file name to clipboardExpand all lines: include/llama.h
+6-6Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,12 +1081,6 @@ extern "C" {
10811081
llama_token_data_array * candidates,
10821082
float temp);
10831083

1084-
/// @details Apply constraints from grammar
1085-
LLAMA_API void llama_sample_grammar(
1086-
struct llama_context * ctx,
1087-
llama_token_data_array * candidates,
1088-
const struct llama_grammar * grammar);
1089-
10901084
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
10911085
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
10921086
/// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@@ -1124,6 +1118,12 @@ extern "C" {
11241118
struct llama_context * ctx,
11251119
llama_token_data_array * candidates);
11261120

1121+
/// @details Apply constraints from grammar
1122+
LLAMA_API void llama_sample_grammar(
1123+
struct llama_context * ctx,
1124+
llama_token_data_array * candidates,
1125+
const struct llama_grammar * grammar);
1126+
11271127
/// @details Accepts the sampled token into the grammar
11281128
LLAMA_API void llama_grammar_accept_token(
11291129
struct llama_context * ctx,

‎src/CMakeLists.txt

Copy file name to clipboardExpand all lines: src/CMakeLists.txt
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ endif()
1414
add_library(llama
1515
../include/llama.h
1616
llama.cpp
17+
llama-sampling.cpp
1718
unicode.h
1819
unicode.cpp
1920
unicode-data.cpp

‎src/llama-impl.h

Copy file name to clipboard
+50Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#pragma once
2+
3+
#define LLAMA_API_INTERNAL
4+
#include "llama.h"
5+
6+
#include <array>
7+
#include <set>
8+
#include <map>
9+
#include <cstdint>
10+
#include <random>
11+
12+
#ifdef __has_include
13+
#if __has_include(<unistd.h>)
14+
#include <unistd.h>
15+
#if defined(_POSIX_MAPPED_FILES)
16+
#include <sys/mman.h>
17+
#include <fcntl.h>
18+
#endif
19+
#if defined(_POSIX_MEMLOCK_RANGE)
20+
#include <sys/resource.h>
21+
#endif
22+
#endif
23+
#endif
24+
25+
// bump if necessary
26+
#define LLAMA_MAX_NODES 8192
27+
#define LLAMA_MAX_LAYERS 256
28+
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2
29+
30+
#ifdef __GNUC__
31+
#ifdef __MINGW32__
32+
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
33+
#else
34+
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
35+
#endif
36+
#else
37+
#define LLAMA_ATTRIBUTE_FORMAT(...)
38+
#endif
39+
40+
//
41+
// logging
42+
//
43+
44+
LLAMA_ATTRIBUTE_FORMAT(2, 3)
45+
void llama_log_internal (ggml_log_level level, const char * format, ...);
46+
void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
47+
48+
#define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__)
49+
#define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__)
50+
#define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.