Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 093847a

Browse filesBrowse files
ggerganovtybalex
authored andcommitted
llamafile : tmp disable + build sgemm.o when needed (ggml-org#6716)
* build : sgemm.o only when needed ggml-ci * llamafile : tmp disable due to MoE bug ggml-ci
1 parent 1df3685 commit 093847a
Copy full SHA for 093847a

File tree

Expand file treeCollapse file tree

2 files changed

+41
-25
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+41
-25
lines changed

‎CMakeLists.txt

Copy file name to clipboardExpand all lines: CMakeLists.txt
+26-12Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ else()
4343
set(LLAMA_METAL_DEFAULT OFF)
4444
endif()
4545

46+
# TODO: fix this for Android CI
47+
# https://github.com/ggerganov/llama.cpp/pull/6716#issuecomment-2061509191
48+
#if (CMAKE_SYSTEM_NAME MATCHES "ANDROID")
49+
# set(LLAMA_LLAMAFILE_DEFAULT OFF)
50+
#else()
51+
# set(LLAMA_LLAMAFILE_DEFAULT ON)
52+
#endif()
53+
54+
# TODO: temporary disable until MoE is fixed
55+
# https://github.com/ggerganov/llama.cpp/pull/6716
56+
set(LLAMA_LLAMAFILE_DEFAULT OFF)
57+
4658
# general
4759
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
4860
option(LLAMA_STATIC "llama: static link libraries" OFF)
@@ -88,7 +100,7 @@ endif()
88100
# 3rd party libs
89101
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
90102
option(LLAMA_BLAS "llama: use BLAS" OFF)
91-
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ON)
103+
option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT})
92104
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
93105
option(LLAMA_CUDA "llama: use CUDA" OFF)
94106
option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF)
@@ -372,6 +384,9 @@ endif()
372384

373385
if (LLAMA_LLAMAFILE)
374386
add_compile_definitions(GGML_USE_LLAMAFILE)
387+
388+
set(GGML_HEADERS_LLAMAFILE sgemm.h)
389+
set(GGML_SOURCES_LLAMAFILE sgemm.cpp)
375390
endif()
376391

377392
if (LLAMA_QKK_64)
@@ -1157,17 +1172,16 @@ add_library(ggml OBJECT
11571172
ggml-backend.h
11581173
ggml-quants.c
11591174
ggml-quants.h
1160-
sgemm.cpp
1161-
sgemm.h
1162-
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1163-
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1164-
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1165-
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1166-
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1167-
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1168-
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1169-
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1170-
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1175+
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
1176+
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
1177+
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
1178+
${GGML_SOURCES_MPI} ${GGML_HEADERS_MPI}
1179+
${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA}
1180+
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
1181+
${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE}
1182+
${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN}
1183+
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
1184+
${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE}
11711185
)
11721186

11731187
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})

‎Makefile

Copy file name to clipboardExpand all lines: Makefile
+15-13Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,6 @@ ifdef LLAMA_DISABLE_LOGS
219219
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
220220
endif # LLAMA_DISABLE_LOGS
221221

222-
# disable ggml.c's use of sgemm.cpp
223-
ifdef LLAMA_NO_LLAMAFILE
224-
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=0
225-
else
226-
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE=1
227-
endif
228-
229222
# warnings
230223
WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
231224
MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
@@ -391,6 +384,15 @@ ifdef LLAMA_OPENBLAS
391384
MK_LDFLAGS += $(shell pkg-config --libs openblas)
392385
endif # LLAMA_OPENBLAS
393386

387+
# TODO: temporary disable until MoE is fixed
388+
# https://github.com/ggerganov/llama.cpp/pull/6716
389+
LLAMA_NO_LLAMAFILE := 1
390+
391+
ifndef LLAMA_NO_LLAMAFILE
392+
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
393+
OBJS += sgemm.o
394+
endif
395+
394396
ifdef LLAMA_BLIS
395397
MK_CPPFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
396398
MK_LDFLAGS += -lblis -L/usr/local/lib
@@ -487,11 +489,9 @@ ggml-cuda/%.o: ggml-cuda/%.cu ggml-cuda/%.cuh ggml.h ggml-common.h ggml-cuda/com
487489

488490
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h ggml.h ggml-backend.h ggml-backend-impl.h ggml-common.h $(wildcard ggml-cuda/*.cuh)
489491
$(NVCC_COMPILE)
490-
491492
endif # LLAMA_CUDA
492493

493494
ifdef LLAMA_CLBLAST
494-
495495
MK_CPPFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags-only-I clblast OpenCL)
496496
MK_CFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
497497
MK_CXXFLAGS += $(shell pkg-config --cflags-only-other clblast OpenCL)
@@ -610,6 +610,11 @@ ggml-mpi.o: ggml-mpi.c ggml-mpi.h
610610
$(CC) $(CFLAGS) -c $< -o $@
611611
endif # LLAMA_MPI
612612

613+
ifndef LLAMA_NO_LLAMAFILE
614+
sgemm.o: sgemm.cpp sgemm.h ggml.h
615+
$(CXX) $(CXXFLAGS) -c $< -o $@
616+
endif
617+
613618
GF_CC := $(CC)
614619
include scripts/get-flags.mk
615620

@@ -683,16 +688,13 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
683688
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
684689
$(CC) $(CFLAGS) -c $< -o $@
685690

686-
sgemm.o: sgemm.cpp sgemm.h ggml.h
687-
$(CXX) $(CXXFLAGS) -c $< -o $@
688-
689691
unicode.o: unicode.cpp unicode.h
690692
$(CXX) $(CXXFLAGS) -c $< -o $@
691693

692694
unicode-data.o: unicode-data.cpp unicode-data.h
693695
$(CXX) $(CXXFLAGS) -c $< -o $@
694696

695-
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o sgemm.o
697+
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o unicode-data.o
696698

697699
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
698700
$(CXX) $(CXXFLAGS) -c $< -o $@

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.