|
|
@@ -1,5 +1,5 @@
|
|
|
-// Defines CLOCK_MONOTONIC on Linux
|
|
|
-#define _POSIX_C_SOURCE 199309L
|
|
|
+// Defines CLOCK_MONOTONIC and asprintf on Linux
|
|
|
+#define _GNU_SOURCE
|
|
|
|
|
|
#include "ggml.h"
|
|
|
|
|
|
@@ -10,6 +10,7 @@
|
|
|
#endif
|
|
|
|
|
|
#include <assert.h>
|
|
|
+#include <errno.h>
|
|
|
#include <time.h>
|
|
|
#include <math.h>
|
|
|
#include <stdlib.h>
|
|
|
@@ -31,7 +32,6 @@
|
|
|
#else
|
|
|
// ref: https://github.com/ggerganov/whisper.cpp/issues/168
|
|
|
#include <windows.h>
|
|
|
-#include <errno.h>
|
|
|
#endif
|
|
|
|
|
|
typedef volatile LONG atomic_int;
|
|
|
@@ -83,6 +83,17 @@ typedef void* thread_ret_t;
|
|
|
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
|
|
#endif
|
|
|
|
|
|
+#define GGML_MLOCK_SUPPORT 0
|
|
|
+
|
|
|
+#ifdef __has_include
|
|
|
+ #if __has_include(<sys/mman.h>)
|
|
|
+ #undef GGML_MLOCK_SUPPORT
|
|
|
+ #define GGML_MLOCK_SUPPORT 1
|
|
|
+ #include <sys/mman.h>
|
|
|
+ #endif
|
|
|
+#endif
|
|
|
+
|
|
|
+
|
|
|
/*#define GGML_PERF*/
|
|
|
#define GGML_DEBUG 0
|
|
|
#define GGML_GELU_FP16
|
|
|
@@ -2344,6 +2355,7 @@ struct ggml_context {
|
|
|
size_t mem_size;
|
|
|
void * mem_buffer;
|
|
|
bool mem_buffer_owned;
|
|
|
+ bool mem_buffer_mlocked;
|
|
|
|
|
|
int n_objects;
|
|
|
|
|
|
@@ -2619,16 +2631,19 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
|
}
|
|
|
|
|
|
*ctx = (struct ggml_context) {
|
|
|
- /*.mem_size =*/ params.mem_size,
|
|
|
- /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
|
|
|
- /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
|
|
- /*.n_objects =*/ 0,
|
|
|
- /*.objects_begin =*/ NULL,
|
|
|
- /*.objects_end =*/ NULL,
|
|
|
- /*.scratch =*/ { 0, 0, NULL, },
|
|
|
- /*.scratch_save =*/ { 0, 0, NULL, },
|
|
|
+ /*.mem_size =*/ params.mem_size,
|
|
|
+ /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
|
|
|
+ /*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
|
|
+ /*.mem_buffer_mlocked =*/ false,
|
|
|
+ /*.n_objects =*/ 0,
|
|
|
+ /*.objects_begin =*/ NULL,
|
|
|
+ /*.objects_end =*/ NULL,
|
|
|
+ /*.scratch =*/ { 0, 0, NULL, },
|
|
|
+ /*.scratch_save =*/ { 0, 0, NULL, },
|
|
|
};
|
|
|
|
|
|
+ GGML_ASSERT(ctx->mem_buffer != NULL); // check for allocation failure
|
|
|
+
|
|
|
ggml_assert_aligned(ctx->mem_buffer);
|
|
|
|
|
|
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
|
|
|
@@ -2651,6 +2666,14 @@ void ggml_free(struct ggml_context * ctx) {
|
|
|
GGML_PRINT_DEBUG("%s: context %d with %d objects has been freed. memory used = %zu\n",
|
|
|
__func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
|
|
|
|
|
|
+#if GGML_MLOCK_SUPPORT
|
|
|
+ if (ctx->mem_buffer_mlocked) {
|
|
|
+ if (munlock(ctx->mem_buffer, ctx->mem_size)) {
|
|
|
+ fprintf(stderr, "%s: failed to munlock buffer: %s\n", __func__, strerror(errno));
|
|
|
+ }
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
if (ctx->mem_buffer_owned) {
|
|
|
free(ctx->mem_buffer);
|
|
|
}
|
|
|
@@ -2679,6 +2702,37 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
+bool ggml_mlock_supported(void) {
|
|
|
+ return GGML_MLOCK_SUPPORT;
|
|
|
+}
|
|
|
+
|
|
|
+#if GGML_MLOCK_SUPPORT
|
|
|
+#ifdef __APPLE__
|
|
|
+ #define MLOCK_SUGGESTION "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or\n" \
|
|
|
+ "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l)."
|
|
|
+#else
|
|
|
+ #define MLOCK_SUGGESTION "Try increasing RLIMIT_MLOCK (ulimit -l)."
|
|
|
+#endif
|
|
|
+bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
|
|
|
+ if (ctx->mem_buffer_mlocked) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ if (mlock(ctx->mem_buffer, ctx->mem_size)) {
|
|
|
+ int ret = asprintf(err_p, "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
|
|
|
+ ctx->mem_size, strerror(errno));
|
|
|
+ GGML_ASSERT(ret >= 0);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+ ctx->mem_buffer_mlocked = true;
|
|
|
+ return true;
|
|
|
+}
|
|
|
+#else // GGML_MLOCK_SUPPORT
|
|
|
+bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
|
|
|
+ *err_p = strdup("can't mlock because it's not supported on this system");
|
|
|
+ return false;
|
|
|
+}
|
|
|
+#endif // GGML_MLOCK_SUPPORT
|
|
|
+
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
struct ggml_tensor * ggml_new_tensor_impl(
|