chat-persistent.sh 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. #!/bin/bash
  2. set -euo pipefail
  3. cd "$(dirname "$0")/.." || exit
  4. if [[ -z "${PROMPT_CACHE_FILE+x}" || -z "${CHAT_SAVE_DIR+x}" ]]; then
  5. echo >&2 "error: PROMPT_CACHE_FILE and CHAT_SAVE_DIR must be provided"
  6. exit 1
  7. fi
  8. MODEL="${MODEL:-./models/llama-13b/ggml-model-q4_0.gguf}"
  9. PROMPT_TEMPLATE="${PROMPT_TEMPLATE:-./prompts/chat.txt}"
  10. USER_NAME="${USER_NAME:-User}"
  11. AI_NAME="${AI_NAME:-ChatLLaMa}"
  12. DATE_TIME="$(date +%H:%M)"
  13. DATE_YEAR="$(date +%Y)"
  14. LOG="${CHAT_SAVE_DIR}/main.log"
  15. LOG_BG="${CHAT_SAVE_DIR}/main-bg.log"
  16. CUR_PROMPT_FILE="${CHAT_SAVE_DIR}/current-prompt.txt"
  17. CUR_PROMPT_CACHE="${CHAT_SAVE_DIR}/current-cache.bin"
  18. NEXT_PROMPT_FILE="${CHAT_SAVE_DIR}/next-prompt.txt"
  19. NEXT_PROMPT_CACHE="${CHAT_SAVE_DIR}/next-cache.bin"
  20. SESSION_AND_SAMPLE_PATTERN='main: session file matches [[:digit:]]+ / [[:digit:]]+'\
  21. '|'\
  22. 'sampling time =[[:space:]]+[[:digit:]]+.[[:digit:]]+ ms /[[:space:]]+[[:digit:]]+'
  23. SED_DELETE_MESSAGES="/^(${USER_NAME}:|${AI_NAME}:|\\.\\.\\.)/,\$d"
  24. CTX_SIZE=2048
  25. CTX_ROTATE_POINT=$((CTX_SIZE * 3 / 5)) # REVIEW
  26. OPTS=(--model "$MODEL" --ctx_size "$CTX_SIZE" --repeat_last_n 256 "$@")
  27. # An unbuffered `tail -c+N`
  28. skip_bytes() {
  29. LANG=C IFS= read -r -n "$1" -d '' c
  30. while LANG=C IFS= read -r -n 1 -d '' c; do
  31. printf '%s' "$c"
  32. done
  33. }
  34. mkdir -p "$CHAT_SAVE_DIR"
  35. echo >"$LOG"
  36. trap "tail -n100 ${LOG}" EXIT
  37. if [[ ! -e "$CUR_PROMPT_FILE" ]]; then
  38. sed -e "s/\[\[USER_NAME\]\]/${USER_NAME}/g" \
  39. -e "s/\[\[AI_NAME\]\]/${AI_NAME}/g" \
  40. -e "s/\[\[DATE_TIME\]\]/${DATE_TIME}/g" \
  41. -e "s/\[\[DATE_YEAR\]\]/${DATE_YEAR}/g" \
  42. "$PROMPT_TEMPLATE" >"$CUR_PROMPT_FILE"
  43. fi
  44. if [[ ! -e "$NEXT_PROMPT_FILE" ]]; then
  45. sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE"
  46. fi
  47. if [[ "$(tail -c4 "$NEXT_PROMPT_FILE")" != "..." ]]; then
  48. echo '...' >>"$NEXT_PROMPT_FILE"
  49. fi
  50. if [[ ! -e "$PROMPT_CACHE_FILE" ]]; then
  51. echo 'Prompt cache does not exist, building...'
  52. # Default batch_size to 64 here for better user feedback during initial prompt processing
  53. ./llama-cli 2>>"$LOG" \
  54. --batch_size 64 \
  55. "${OPTS[@]}" \
  56. --prompt-cache "$PROMPT_CACHE_FILE" \
  57. --file "$CUR_PROMPT_FILE" \
  58. --n_predict 1
  59. echo
  60. echo 'Done!'
  61. fi
  62. if [[ ! -e "$CUR_PROMPT_CACHE" ]]; then
  63. cp "$PROMPT_CACHE_FILE" "$CUR_PROMPT_CACHE"
  64. fi
  65. if [[ ! -e "$NEXT_PROMPT_CACHE" ]]; then
  66. cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE"
  67. fi
  68. printf '%s ' "$(< "$CUR_PROMPT_FILE")"
  69. n_tokens=0
  70. while read -e line; do
  71. # Limit generation to remaining context, with a buffer and estimating 2 chars/token for input
  72. n_predict=$((CTX_SIZE - n_tokens - ${#line} / 2 - 32))
  73. # Swap prompts when we're about to run out of context
  74. if ((n_predict <= 0)); then
  75. wait # for background main (below) to finish with next prompt
  76. mv "$NEXT_PROMPT_FILE" "$CUR_PROMPT_FILE"
  77. mv "$NEXT_PROMPT_CACHE" "$CUR_PROMPT_CACHE"
  78. sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE"
  79. echo '...' >>"$NEXT_PROMPT_FILE"
  80. cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE"
  81. n_tokens=0
  82. n_predict=$((CTX_SIZE / 2))
  83. fi
  84. echo " ${line}" >>"$CUR_PROMPT_FILE"
  85. if ((n_tokens > CTX_ROTATE_POINT)); then
  86. echo " ${line}" >>"$NEXT_PROMPT_FILE"
  87. fi
  88. n_prompt_len_pre=$(($(wc -c <"$CUR_PROMPT_FILE")))
  89. printf '%s: ' "$AI_NAME" >>"$CUR_PROMPT_FILE"
  90. ./llama-cli 2>>"$LOG" "${OPTS[@]}" \
  91. --prompt-cache "$CUR_PROMPT_CACHE" \
  92. --prompt-cache-all \
  93. --file "$CUR_PROMPT_FILE" \
  94. --reverse-prompt "${USER_NAME}:" \
  95. --n_predict "$n_predict" |
  96. skip_bytes 1 | # skip BOS token added by ./llama-cli
  97. tee "$CUR_PROMPT_FILE.tmp" | # save prompt + generation to tmp file
  98. skip_bytes "$n_prompt_len_pre" # print generation
  99. mv "$CUR_PROMPT_FILE.tmp" "$CUR_PROMPT_FILE"
  100. # if we hit n_predict instead of reverse-prompt, we need to add the prompt
  101. if [[ "$(tail -n1 "$CUR_PROMPT_FILE")" != "${USER_NAME}:" ]]; then
  102. printf '\n%s:' "$USER_NAME"
  103. printf '\n%s:' "$USER_NAME" >> "$CUR_PROMPT_FILE"
  104. fi
  105. printf ' '
  106. if ! session_and_sample_msg=$(tail -n30 "$LOG" | grep -oE "$SESSION_AND_SAMPLE_PATTERN"); then
  107. echo >&2 "Couldn't get number of tokens from ./llama-cli output!"
  108. exit 1
  109. fi
  110. n_tokens=$(awk '{sum+=$1} END {print sum}' <<< "$(cut -d/ -f2 <<< "$session_and_sample_msg")")
  111. if ((n_tokens > CTX_ROTATE_POINT)); then
  112. tail -c+$((n_prompt_len_pre + 1)) "$CUR_PROMPT_FILE" >>"$NEXT_PROMPT_FILE"
  113. fi
  114. # Update cache for next prompt in background, ideally during user input
  115. ./llama-cli >>"$LOG_BG" 2>&1 "${OPTS[@]}" \
  116. --prompt-cache "$NEXT_PROMPT_CACHE" \
  117. --file "$NEXT_PROMPT_FILE" \
  118. --n_predict 1 &
  119. done