server: init working 1.6

ggml-org · ggerganov · Feb 20, 2024 · Feb 17, 2024 · Feb 17, 2024 · Feb 17, 2024
commit 8354b7d2adfb90a556da459760f7cb1781710784
diff --git a/Makefile b/Makefile
@@ -691,7 +691,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)

-server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
+server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h examples/llava/llava.h examples/llava/llava.cpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
 	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)

diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -5,6 +5,7 @@
 #include "oai.hpp"

 #include "../llava/clip.h"
+#include "../llava/llava.h"

 #include "stb_image.h"

@@ -31,6 +32,14 @@

 using json = nlohmann::json;

+// TODO should be in clip.h?
+struct clip_image_u8 {
+    int nx;
+    int ny;
+
+    std::vector<uint8_t> buf;
+};
+
 struct server_params
 {
    std::string hostname = "127.0.0.1";
@@ -702,11 +711,12 @@ struct llama_server_context
                    slot_image img_sl;
                    img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
                    img_sl.img_data = clip_image_u8_init();
-                    if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
-                    {
-                        LOG_TEE("slot %i - failed to load image [id: %i]\n", slot->id, img_sl.id);
-                        return false;
-                    }
+                    img_sl.img_data->buf = image_buffer;
+                    // if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
+                    // {
+                    //     LOG_TEE("slot %i - failed to load image [id: %i]\n", slot->id, img_sl.id);
+                    //     return false;
+                    // }
                    LOG_TEE("slot %i - loaded image\n", slot->id);
                    img_sl.request_encode_image = true;
                    slot->images.push_back(img_sl);
@@ -983,43 +993,16 @@ struct llama_server_context
            {
                continue;
            }
-            clip_image_f32_batch img_res_v;
-            img_res_v.size = 0;
-            img_res_v.data = nullptr;
-            if (!clip_image_preprocess(clp_ctx, img.img_data, img_res_v))
-            {
-                LOG_TEE("Error processing the given image");
-                clip_free(clp_ctx);
-                clip_image_f32_batch_free(img_res_v);
-                return false;
-            }
-            if (img_res_v.size == 0)
-            {
-                LOG_TEE("Error processing the given image");
-                return false;
-            }
-
-            // note: assumes only one image was returned by clip_image_preprocess
-            clip_image_f32 * img_res = img_res_v.data;

-            img.image_tokens = clip_n_patches(clp_ctx);
-            img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx));
-            if (!img.image_embedding)
-            {
-                LOG_TEE("Unable to allocate memory for image embeddings\n");
-                clip_image_f32_batch_free(img_res_v);
-                clip_free(clp_ctx);
-                return false;
-            }
-            LOG_TEE("slot %i - encoding image [id: %i]\n", slot.id, img.id);
-            if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding))
-            {
-                LOG_TEE("Unable to encode image\n");
-                clip_image_f32_batch_free(img_res_v);
+            // TODO call encode_image_with_clip instead?
+            llava_image_embed * embed = llava_image_embed_make_with_bytes(clp_ctx, params.n_threads, img.img_data->buf.data(), img.img_data->buf.size());
+            if (!embed) {
+                LOG_TEE("Error processing the given image");
                return false;
            }

-            clip_image_f32_batch_free(img_res_v);
+            img.image_embedding = embed->embed;
+            img.image_tokens = embed->n_image_pos;

            img.request_encode_image = false;
        }