8 kuukautta sitten · 13b4548877
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -994,7 +994,6 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
 
				         "llama-embedding",
			
 
				         "llama-eval-callback",
			
 
				         "llama-export-lora",
			
 
				-        "llama-gbnf-validator",
			
 
				         "llama-gen-docs",
			
 
				         "llama-gguf",
			
 
				         "llama-gguf-hash",
			
@@ -1014,7 +1013,6 @@ static void common_params_print_completion(common_params_context & ctx_arg) {
 
				         "llama-perplexity",
			
 
				         "llama-q8dot",
			
 
				         "llama-quantize",
			
 
				-        "llama-quantize-stats",
			
 
				         "llama-qwen2vl-cli",
			
 
				         "llama-retrieval",
			
 
				         "llama-run",
			
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -21,11 +21,6 @@ else()
 
				     add_subdirectory(embedding)
			
 
				     add_subdirectory(eval-callback)
			
 
				 
			
 
				-    if (NOT WIN32)
			
 
				-        # disabled on Windows because it uses internal functions not exported with LLAMA_API
			
 
				-        add_subdirectory(gbnf-validator)
			
 
				-    endif()
			
 
				-
			
 
				     add_subdirectory(gguf-hash)
			
 
				     add_subdirectory(gguf-split)
			
 
				     add_subdirectory(gguf)
			
@@ -58,10 +53,6 @@ else()
 
				         add_subdirectory(convert-llama2c-to-ggml)
			
 
				         add_subdirectory(cvector-generator)
			
 
				         add_subdirectory(export-lora)
			
 
				-        if (NOT WIN32)
			
 
				-            # disabled on Windows because it uses internal functions not exported with LLAMA_API
			
 
				-            add_subdirectory(quantize-stats)
			
 
				-        endif()
			
 
				         add_subdirectory(llava)
			
 
				         if (GGML_RPC)
			
 
				             add_subdirectory(rpc)
			
--- a/examples/gbnf-validator/CMakeLists.txt
+++ b/examples/gbnf-validator/CMakeLists.txt
@@ -1,5 +0,0 @@
 
				-set(TARGET llama-gbnf-validator)
			
 
				-add_executable(${TARGET} gbnf-validator.cpp)
			
 
				-install(TARGETS ${TARGET} RUNTIME)
			
 
				-target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
			
 
				-target_compile_features(${TARGET} PRIVATE cxx_std_17)
			
--- a/examples/quantize-stats/CMakeLists.txt
+++ b/examples/quantize-stats/CMakeLists.txt
@@ -1,6 +0,0 @@
 
				-set(TARGET llama-quantize-stats)
			
 
				-add_executable(${TARGET} quantize-stats.cpp)
			
 
				-install(TARGETS ${TARGET} RUNTIME)
			
 
				-target_link_libraries(${TARGET} PRIVATE llama build_info ${CMAKE_THREAD_LIBS_INIT})
			
 
				-target_include_directories(${TARGET} PRIVATE ../../common)
			
 
				-target_compile_features(${TARGET} PRIVATE cxx_std_17)
			
--- a/grammars/README.md
+++ b/grammars/README.md
@@ -112,7 +112,7 @@ You can use GBNF grammars:
 
				 
			
 
				 - In [llama-server](../examples/server)'s completion endpoints, passed as the `grammar` body field
			
 
				 - In [llama-cli](../examples/main), passed as the `--grammar` & `--grammar-file` flags
			
 
				-- With [llama-gbnf-validator](../examples/gbnf-validator) tool, to test them against strings.
			
 
				+- With [test-gbnf-validator](../tests/test-gbnf-validator.cpp), to test them against strings.
			
 
				 
			
 
				 ## JSON Schemas → GBNF
			
 
				 
			
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -32,8 +32,9 @@ add_library(llama
 
				             unicode.h
			
 
				             )
			
 
				 
			
 
				-target_include_directories(llama PUBLIC . ../include)
			
 
				-target_compile_features   (llama PUBLIC cxx_std_17) # don't bump
			
 
				+target_include_directories(llama PRIVATE .)
			
 
				+target_include_directories(llama PUBLIC ../include)
			
 
				+target_compile_features   (llama PRIVATE cxx_std_17) # don't bump
			
 
				 
			
 
				 target_link_libraries(llama PUBLIC ggml)
			
 
				 
			
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,5 +1,17 @@
 
				 llama_add_compile_flags()
			
 
				 
			
 
				+function(llama_build source)
			
 
				+    if (DEFINED LLAMA_TEST_NAME)
			
 
				+        set(TEST_TARGET ${LLAMA_TEST_NAME})
			
 
				+    else()
			
 
				+        get_filename_component(TEST_TARGET ${source} NAME_WE)
			
 
				+    endif()
			
 
				+
			
 
				+    add_executable(${TEST_TARGET} ${source})
			
 
				+    target_link_libraries(${TEST_TARGET} PRIVATE common)
			
 
				+    install(TARGETS ${TEST_TARGET} RUNTIME)
			
 
				+endfunction()
			
 
				+
			
 
				 function(llama_test target)
			
 
				     include(CMakeParseArguments)
			
 
				     set(options)
			
@@ -36,7 +48,7 @@ endfunction()
 
				 # - LABEL: label for the test (defaults to main)
			
 
				 # - ARGS: arguments to pass to the test executable
			
 
				 # - WORKING_DIRECTORY
			
 
				-function(llama_target_and_test source)
			
 
				+function(llama_build_and_test source)
			
 
				     include(CMakeParseArguments)
			
 
				     set(options)
			
 
				     set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
			
@@ -58,6 +70,7 @@ function(llama_target_and_test source)
 
				     add_executable(${TEST_TARGET} ${source} get-model.cpp)
			
 
				     install(TARGETS ${TEST_TARGET} RUNTIME)
			
 
				     target_link_libraries(${TEST_TARGET} PRIVATE common)
			
 
				+
			
 
				     add_test(
			
 
				         NAME ${TEST_TARGET}
			
 
				         WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
			
@@ -68,9 +81,7 @@ function(llama_target_and_test source)
 
				 endfunction()
			
 
				 
			
 
				 # build test-tokenizer-0 target once and add many tests
			
 
				-add_executable(test-tokenizer-0 test-tokenizer-0.cpp)
			
 
				-target_link_libraries(test-tokenizer-0 PRIVATE common)
			
 
				-install(TARGETS test-tokenizer-0 RUNTIME)
			
 
				+llama_build(test-tokenizer-0.cpp)
			
 
				 
			
 
				 llama_test(test-tokenizer-0 NAME test-tokenizer-0-bert-bge          ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bert-bge.gguf)
			
 
				 llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-command-r.gguf)
			
@@ -87,27 +98,27 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact            ARGS ${CMAKE
 
				 llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
			
 
				 
			
 
				 if (LLAMA_LLGUIDANCE)
			
 
				-    llama_target_and_test(test-grammar-llguidance.cpp ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf)
			
 
				+    llama_build_and_test(test-grammar-llguidance.cpp ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-bpe.gguf)
			
 
				 endif ()
			
 
				 
			
 
				 if (NOT WIN32)
			
 
				     # these tests are disabled on Windows because they use internal functions not exported with LLAMA_API
			
 
				-    llama_target_and_test(test-sampling.cpp)
			
 
				-    llama_target_and_test(test-grammar-parser.cpp)
			
 
				-    llama_target_and_test(test-grammar-integration.cpp)
			
 
				-    llama_target_and_test(test-llama-grammar.cpp)
			
 
				-    llama_target_and_test(test-chat.cpp)
			
 
				+    llama_build_and_test(test-sampling.cpp)
			
 
				+    llama_build_and_test(test-grammar-parser.cpp)
			
 
				+    llama_build_and_test(test-grammar-integration.cpp)
			
 
				+    llama_build_and_test(test-llama-grammar.cpp)
			
 
				+    llama_build_and_test(test-chat.cpp)
			
 
				     # TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
			
 
				     if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
			
 
				-        llama_target_and_test(test-json-schema-to-grammar.cpp   WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
			
 
				+        llama_build_and_test(test-json-schema-to-grammar.cpp   WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
			
 
				         target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
			
 
				     endif()
			
 
				 
			
 
				+    llama_build(test-quantize-stats.cpp)
			
 
				+    llama_build(test-gbnf-validator.cpp)
			
 
				 
			
 
				     # build test-tokenizer-1-bpe target once and add many tests
			
 
				-    add_executable(test-tokenizer-1-bpe test-tokenizer-1-bpe.cpp)
			
 
				-    target_link_libraries(test-tokenizer-1-bpe PRIVATE common)
			
 
				-    install(TARGETS test-tokenizer-1-bpe RUNTIME)
			
 
				+    llama_build(test-tokenizer-1-bpe.cpp)
			
 
				 
			
 
				     # TODO: disabled due to slowness
			
 
				     #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila    ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
			
@@ -120,37 +131,35 @@ if (NOT WIN32)
 
				     #llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
			
 
				 
			
 
				     # build test-tokenizer-1-spm target once and add many tests
			
 
				-    add_executable(test-tokenizer-1-spm test-tokenizer-1-spm.cpp)
			
 
				-    target_link_libraries(test-tokenizer-1-spm PRIVATE common)
			
 
				-    install(TARGETS test-tokenizer-1-spm RUNTIME)
			
 
				+    llama_build(test-tokenizer-1-spm.cpp)
			
 
				 
			
 
				     llama_test(test-tokenizer-1-spm  NAME test-tokenizer-1-llama-spm ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama-spm.gguf)
			
 
				     #llama_test(test-tokenizer-1-spm  NAME test-tokenizer-1-baichuan  ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
			
 
				 
			
 
				-    # llama_target_and_test(test-double-float.cpp) # SLOW
			
 
				+    # llama_build_and_test(test-double-float.cpp) # SLOW
			
 
				 endif()
			
 
				 
			
 
				-llama_target_and_test(test-log.cpp)
			
 
				-llama_target_and_test(test-chat-template.cpp)
			
 
				+llama_build_and_test(test-log.cpp)
			
 
				+llama_build_and_test(test-chat-template.cpp)
			
 
				 
			
 
				 # this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135)
			
 
				 if (NOT WIN32)
			
 
				-    llama_target_and_test(test-arg-parser.cpp)
			
 
				+    llama_build_and_test(test-arg-parser.cpp)
			
 
				 endif()
			
 
				 
			
 
				-# llama_target_and_test(test-opt.cpp) # SLOW
			
 
				-llama_target_and_test(test-gguf.cpp)
			
 
				-llama_target_and_test(test-backend-ops.cpp)
			
 
				+# llama_build_and_test(test-opt.cpp) # SLOW
			
 
				+llama_build_and_test(test-gguf.cpp)
			
 
				+llama_build_and_test(test-backend-ops.cpp)
			
 
				 
			
 
				-llama_target_and_test(test-model-load-cancel.cpp  LABEL "model")
			
 
				-llama_target_and_test(test-autorelease.cpp        LABEL "model")
			
 
				+llama_build_and_test(test-model-load-cancel.cpp  LABEL "model")
			
 
				+llama_build_and_test(test-autorelease.cpp        LABEL "model")
			
 
				 
			
 
				 if (NOT GGML_BACKEND_DL)
			
 
				     # these tests use the backends directly and cannot be built with dynamic loading
			
 
				-    llama_target_and_test(test-barrier.cpp)
			
 
				-    llama_target_and_test(test-quantize-fns.cpp)
			
 
				-    llama_target_and_test(test-quantize-perf.cpp)
			
 
				-    llama_target_and_test(test-rope.cpp)
			
 
				+    llama_build_and_test(test-barrier.cpp)
			
 
				+    llama_build_and_test(test-quantize-fns.cpp)
			
 
				+    llama_build_and_test(test-quantize-perf.cpp)
			
 
				+    llama_build_and_test(test-rope.cpp)
			
 
				 endif()
			
 
				 
			
 
				 
			
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -11,8 +11,9 @@
 
				 #include <string>
			
 
				 
			
 
				 #include "chat.h"
			
 
				-#include "llama-grammar.h"
			
 
				-#include "unicode.h"
			
 
				+
			
 
				+#include "../src/unicode.h"
			
 
				+#include "../src/llama-grammar.h"
			
 
				 
			
 
				 using json = nlohmann::ordered_json;
			
 
				 
			
--- a/examples/gbnf-validator/gbnf-validator.cpp
+++ b/examples/gbnf-validator/gbnf-validator.cpp
@@ -1,5 +1,5 @@
 
				-#include "unicode.h"
			
 
				-#include "llama-grammar.h"
			
 
				+#include "../src/unicode.h"
			
 
				+#include "../src/llama-grammar.h"
			
 
				 
			
 
				 #include <cstdio>
			
 
				 #include <cstdlib>
			
--- a/tests/test-grammar-integration.cpp
+++ b/tests/test-grammar-integration.cpp
@@ -2,10 +2,11 @@
 
				 #undef NDEBUG
			
 
				 #endif
			
 
				 
			
 
				-#include "unicode.h"
			
 
				-#include "llama-grammar.h"
			
 
				 #include "json-schema-to-grammar.h"
			
 
				 
			
 
				+#include "../src/unicode.h"
			
 
				+#include "../src/llama-grammar.h"
			
 
				+
			
 
				 #include <cassert>
			
 
				 #include <string>
			
 
				 #include <vector>
			
--- a/tests/test-grammar-llguidance.cpp
+++ b/tests/test-grammar-llguidance.cpp
@@ -2,7 +2,6 @@
 
				 #    undef NDEBUG
			
 
				 #endif
			
 
				 
			
 
				-#include "unicode.h"
			
 
				 #include "sampling.h"
			
 
				 
			
 
				 #include <cassert>
			
@@ -84,7 +83,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
 
				 
			
 
				             fprintf(stderr,
			
 
				                     "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following "
			
 
				-                    "command:     ./llama-gbnf-validator test-grammar-integration.grammar.gbnf "
			
 
				+                    "command:     ./test-gbnf-validator test-grammar-integration.grammar.gbnf "
			
 
				                     "test-grammar-integration.string.txt\n\n");
			
 
				         } else {
			
 
				             fprintf(stdout, "✅︎\n");
			
--- a/tests/test-grammar-parser.cpp
+++ b/tests/test-grammar-parser.cpp
@@ -3,7 +3,9 @@
 
				 #endif
			
 
				 
			
 
				 #include "llama.h"
			
 
				-#include "llama-grammar.h"
			
 
				+
			
 
				+// TODO: shold not include libllama sources
			
 
				+#include "../src/llama-grammar.h"
			
 
				 
			
 
				 #include <cassert>
			
 
				 
			
--- a/tests/test-json-schema-to-grammar.cpp
+++ b/tests/test-json-schema-to-grammar.cpp
@@ -4,7 +4,7 @@
 
				 
			
 
				 #include "json-schema-to-grammar.h"
			
 
				 
			
 
				-#include "llama-grammar.h"
			
 
				+#include "../src/llama-grammar.h"
			
 
				 
			
 
				 #include <cassert>
			
 
				 #include <fstream>
			
--- a/tests/test-llama-grammar.cpp
+++ b/tests/test-llama-grammar.cpp
@@ -3,7 +3,8 @@
 
				 #endif
			
 
				 
			
 
				 #include "llama.h"
			
 
				-#include "llama-grammar.h"
			
 
				+
			
 
				+#include "../src/llama-grammar.h"
			
 
				 
			
 
				 #include <cassert>
			
 
				 #include <stdexcept>
			
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -1,8 +1,9 @@
 
				 #include "ggml.h"
			
 
				 #include "llama.h"
			
 
				-#include "llama-model.h"
			
 
				 #include "common.h"
			
 
				 
			
 
				+#include "../src/llama-model.h"
			
 
				+
			
 
				 #include <algorithm>
			
 
				 #include <cassert>
			
 
				 #include <cinttypes>
			
--- a/tests/test-tokenizer-1-bpe.cpp
+++ b/tests/test-tokenizer-1-bpe.cpp
@@ -1,8 +1,9 @@
 
				 #include "llama.h"
			
 
				 #include "common.h"
			
 
				-#include "unicode.h"
			
 
				 #include "console.h"
			
 
				 
			
 
				+#include "../src/unicode.h"
			
 
				+
			
 
				 #include <cassert>
			
 
				 #include <codecvt>
			
 
				 #include <cstdio>
			
--- a/tests/test-tokenizer-1-spm.cpp
+++ b/tests/test-tokenizer-1-spm.cpp
@@ -1,8 +1,9 @@
 
				 #include "llama.h"
			
 
				 #include "common.h"
			
 
				-#include "unicode.h"
			
 
				 #include "console.h"
			
 
				 
			
 
				+#include "../src/unicode.h"
			
 
				+
			
 
				 #include <cassert>
			
 
				 #include <codecvt>
			
 
				 #include <cstdio>