|
@@ -568,6 +568,50 @@ function gg_sum_open_llama_7b_v2 {
|
|
|
#gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
|
|
#gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+# bge-small
|
|
|
|
|
+
|
|
|
|
|
+function gg_run_embd_bge_small {
|
|
|
|
|
+ cd ${SRC}
|
|
|
|
|
+
|
|
|
|
|
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
|
|
|
|
|
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.model
|
|
|
|
|
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json
|
|
|
|
|
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json
|
|
|
|
|
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin
|
|
|
|
|
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json
|
|
|
|
|
+ gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt
|
|
|
|
|
+
|
|
|
|
|
+ path_models="../models-mnt/bge-small"
|
|
|
|
|
+
|
|
|
|
|
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
|
|
|
|
+
|
|
|
|
|
+ set -e
|
|
|
|
|
+
|
|
|
|
|
+ (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
|
|
|
|
+ (time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
|
|
|
|
+
|
|
|
|
|
+ python3 ../convert-hf-to-gguf.py ${path_models}
|
|
|
|
|
+
|
|
|
|
|
+ model_f16="${path_models}/ggml-model-f16.gguf"
|
|
|
|
|
+ model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
|
|
|
|
+
|
|
|
|
|
+ ./bin/quantize ${model_f16} ${model_q8_0} q8_0
|
|
|
|
|
+
|
|
|
|
|
+ (time ./bin/embedding --model ${model_f16} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
|
|
|
|
+ (time ./bin/embedding --model ${model_q8_0} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
|
|
|
|
+
|
|
|
|
|
+ set +e
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+function gg_sum_embd_bge_small {
|
|
|
|
|
+ gg_printf '### %s\n\n' "${ci}"
|
|
|
|
|
+
|
|
|
|
|
+ gg_printf 'BGE Small (BERT):\n'
|
|
|
|
|
+ gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
|
|
|
|
+ gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
|
|
|
|
+ gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
## main
|
|
## main
|
|
|
|
|
|
|
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
|
@@ -591,6 +635,8 @@ test $ret -eq 0 && gg_run ctest_debug
|
|
|
test $ret -eq 0 && gg_run ctest_release
|
|
test $ret -eq 0 && gg_run ctest_release
|
|
|
|
|
|
|
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
|
|
|
|
+ test $ret -eq 0 && gg_run embd_bge_small
|
|
|
|
|
+
|
|
|
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
|
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
|
|
if [ -z ${GG_BUILD_CUDA} ]; then
|
|
if [ -z ${GG_BUILD_CUDA} ]; then
|
|
|
test $ret -eq 0 && gg_run open_llama_3b_v2
|
|
test $ret -eq 0 && gg_run open_llama_3b_v2
|