|
|
@@ -2,6 +2,7 @@
|
|
|
#include "llava.h"
|
|
|
|
|
|
#include "llama.h"
|
|
|
+#include "ggml-cpp.h"
|
|
|
|
|
|
#include <algorithm>
|
|
|
#include <cerrno>
|
|
|
@@ -209,7 +210,10 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
|
|
|
struct ggml_tensor *flatten = ggml_view_2d(model.ctx, permuted_cont, clip_n_mmproj_embd(ctx_clip), num_patches_height * num_patches_width * num_patches_per_side * num_patches_per_side, size_ele * clip_n_mmproj_embd(ctx_clip), 0);
|
|
|
// ggml_tensor_printf(flatten,"flatten",__LINE__,false,false);
|
|
|
ggml_build_forward_expand(gf, flatten);
|
|
|
- ggml_graph_compute_with_ctx(model.ctx, gf, 1);
|
|
|
+
|
|
|
+ ggml_backend_ptr backend { ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, nullptr) };
|
|
|
+ ggml_backend_graph_compute(backend.get(), gf);
|
|
|
+
|
|
|
struct ggml_tensor* result = ggml_graph_node(gf, -1);
|
|
|
|
|
|
memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
|