Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 5c64a09

Browse filesBrowse files
authored
k-quants : allow to optionally disable at compile time (abetlen#1734)
* k-quants : put behind optional compile flag LLAMA_K_QUANTS * build : enable k-quants by default
1 parent 5b57a5b commit 5c64a09
Copy full SHA for 5c64a09

File tree

Expand file treeCollapse file tree

6 files changed

+251
-229
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+251
-229
lines changed

‎CMakeLists.txt

Copy file name to clipboardExpand all lines: CMakeLists.txt
+6-2Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kern
7272
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
7373
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
7474
option(LLAMA_METAL "llama: use Metal" OFF)
75+
option(LLAMA_K_QUANTS "llama: use k-quants" ON)
7576

7677
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
7778
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
@@ -226,6 +227,10 @@ if (LLAMA_METAL)
226227
)
227228
endif()
228229

230+
if (LLAMA_K_QUANTS)
231+
set(GGML_SOURCES_EXTRA ${GGML_SOURCES_EXTRA} k_quants.c k_quants.h)
232+
endif()
233+
229234
if (LLAMA_CLBLAST)
230235
find_package(CLBlast)
231236
if (CLBlast_FOUND)
@@ -396,11 +401,10 @@ endif()
396401
add_library(ggml OBJECT
397402
ggml.c
398403
ggml.h
399-
ggml-quants-k.h
400-
ggml-quants-k.c
401404
${GGML_SOURCES_CUDA}
402405
${GGML_SOURCES_OPENCL}
403406
${GGML_SOURCES_METAL}
407+
${GGML_SOURCES_EXTRA}
404408
)
405409

406410
target_include_directories(ggml PUBLIC .)

‎Makefile

Copy file name to clipboardExpand all lines: Makefile
+22-15Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
121121
endif
122122
endif
123123

124+
ifndef LLAMA_NO_K_QUANTS
125+
CFLAGS += -DGGML_USE_K_QUANTS
126+
OBJS += k_quants.o
127+
endif
128+
124129
ifndef LLAMA_NO_ACCELERATE
125130
# Mac M1 - include Accelerate framework.
126131
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
@@ -140,7 +145,7 @@ ifdef LLAMA_OPENBLAS
140145
endif # LLAMA_OPENBLAS
141146

142147
ifdef LLAMA_BLIS
143-
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
148+
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
144149
LDFLAGS += -lblis -L/usr/local/lib
145150
endif # LLAMA_BLIS
146151

@@ -212,6 +217,11 @@ ifneq ($(filter armv8%,$(UNAME_M)),)
212217
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
213218
endif
214219

220+
ifdef LLAMA_NO_K_QUANTS
221+
k_quants.o: k_quants.c k_quants.h
222+
$(CC) $(CFLAGS) -c $< -o $@
223+
endif # LLAMA_NO_K_QUANTS
224+
215225
#
216226
# Print build information
217227
#
@@ -231,10 +241,7 @@ $(info )
231241
# Build library
232242
#
233243

234-
ggml.o: ggml.c ggml.h ggml-cuda.h ggml-quants-k.h
235-
$(CC) $(CFLAGS) -c $< -o $@
236-
237-
ggml-quants-k.o: ggml-quants-k.c ggml-quants-k.h ggml.h ggml-cuda.h
244+
ggml.o: ggml.c ggml.h ggml-cuda.h
238245
$(CC) $(CFLAGS) -c $< -o $@
239246

240247
llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
@@ -243,7 +250,7 @@ llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h
243250
common.o: examples/common.cpp examples/common.h
244251
$(CXX) $(CXXFLAGS) -c $< -o $@
245252

246-
libllama.so: llama.o ggml.o ggml-quants-k.o $(OBJS)
253+
libllama.so: llama.o ggml.o $(OBJS)
247254
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
248255

249256
clean:
@@ -253,28 +260,28 @@ clean:
253260
# Examples
254261
#
255262

256-
main: examples/main/main.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
263+
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS)
257264
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
258265
@echo
259266
@echo '==== Run ./main -h for help. ===='
260267
@echo
261268

262-
quantize: examples/quantize/quantize.cpp build-info.h ggml.o ggml-quants-k.o llama.o $(OBJS)
269+
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
263270
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
264271

265-
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o ggml-quants-k.o llama.o $(OBJS)
272+
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS)
266273
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
267274

268-
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
275+
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS)
269276
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
270277

271-
embedding: examples/embedding/embedding.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
278+
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS)
272279
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
273280

274-
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
281+
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
275282
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
276283

277-
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS)
284+
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS)
278285
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
279286

280287
build-info.h: $(wildcard .git/index) scripts/build-info.sh
@@ -289,11 +296,11 @@ build-info.h: $(wildcard .git/index) scripts/build-info.sh
289296
# Tests
290297
#
291298

292-
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o ggml-quants-k.o $(OBJS)
299+
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
293300
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
294301
./$@
295302

296-
vdot: pocs/vdot/vdot.cpp ggml.o ggml-quants-k.o $(OBJS)
303+
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
297304
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
298305

299306
.PHONY: tests clean

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.