1
0

chat-persistent.sh 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. #!/bin/bash
  2. set -euo pipefail
  3. cd "$(dirname "$0")/.." || exit
  4. if [[ -z "${PROMPT_CACHE_FILE+x}" || -z "${CHAT_SAVE_DIR+x}" ]]; then
  5. echo >&2 "error: PROMPT_CACHE_FILE and CHAT_SAVE_DIR must be provided"
  6. exit 1
  7. fi
  8. MODEL="${MODEL:-./models/llama-13b/ggml-model-q4_0.gguf}"
  9. PROMPT_TEMPLATE="${PROMPT_TEMPLATE:-./prompts/chat.txt}"
  10. USER_NAME="${USER_NAME:-User}"
  11. AI_NAME="${AI_NAME:-ChatLLaMa}"
  12. DATE_TIME="$(date +%H:%M)"
  13. DATE_YEAR="$(date +%Y)"
  14. LOG="${CHAT_SAVE_DIR}/main.log"
  15. LOG_BG="${CHAT_SAVE_DIR}/main-bg.log"
  16. CUR_PROMPT_FILE="${CHAT_SAVE_DIR}/current-prompt.txt"
  17. CUR_PROMPT_CACHE="${CHAT_SAVE_DIR}/current-cache.bin"
  18. NEXT_PROMPT_FILE="${CHAT_SAVE_DIR}/next-prompt.txt"
  19. NEXT_PROMPT_CACHE="${CHAT_SAVE_DIR}/next-cache.bin"
  20. SESSION_SIZE_MSG_PATTERN='main: session file matches [[:digit:]]+ / [[:digit:]]+'
  21. SAMPLE_TIME_MSG_PATTERN='sample time =[[:space:]]+[[:digit:]]+.[[:digit:]]+ ms /[[:space:]]+[[:digit:]]+'
  22. SED_DELETE_MESSAGES="/^(${USER_NAME}:|${AI_NAME}:|\\.\\.\\.)/,\$d"
  23. CTX_SIZE=2048
  24. CTX_ROTATE_POINT=$((CTX_SIZE * 3 / 5)) # REVIEW
  25. OPTS=(--model "$MODEL" --ctx_size "$CTX_SIZE" --repeat_last_n 256 "$@")
  26. # An unbuffered `tail -c+N`
  27. skip_bytes() {
  28. LANG=C IFS= read -r -n "$1" -d '' c
  29. while LANG=C IFS= read -r -n 1 -d '' c; do
  30. printf '%s' "$c"
  31. done
  32. }
  33. mkdir -p "$CHAT_SAVE_DIR"
  34. echo >"$LOG"
  35. trap "tail -n100 ${LOG}" EXIT
  36. if [[ ! -e "$CUR_PROMPT_FILE" ]]; then
  37. sed -e "s/\[\[USER_NAME\]\]/${USER_NAME}/g" \
  38. -e "s/\[\[AI_NAME\]\]/${AI_NAME}/g" \
  39. -e "s/\[\[DATE_TIME\]\]/${DATE_TIME}/g" \
  40. -e "s/\[\[DATE_YEAR\]\]/${DATE_YEAR}/g" \
  41. "$PROMPT_TEMPLATE" >"$CUR_PROMPT_FILE"
  42. fi
  43. if [[ ! -e "$NEXT_PROMPT_FILE" ]]; then
  44. sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE"
  45. fi
  46. if [[ "$(tail -c4 "$NEXT_PROMPT_FILE")" != "..." ]]; then
  47. echo '...' >>"$NEXT_PROMPT_FILE"
  48. fi
  49. if [[ ! -e "$PROMPT_CACHE_FILE" ]]; then
  50. echo 'Prompt cache does not exist, building...'
  51. # Default batch_size to 64 here for better user feedback during initial prompt processing
  52. ./main 2>>"$LOG" \
  53. --batch_size 64 \
  54. "${OPTS[@]}" \
  55. --prompt-cache "$PROMPT_CACHE_FILE" \
  56. --file "$CUR_PROMPT_FILE" \
  57. --n_predict 1
  58. echo
  59. echo 'Done!'
  60. fi
  61. if [[ ! -e "$CUR_PROMPT_CACHE" ]]; then
  62. cp "$PROMPT_CACHE_FILE" "$CUR_PROMPT_CACHE"
  63. fi
  64. if [[ ! -e "$NEXT_PROMPT_CACHE" ]]; then
  65. cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE"
  66. fi
  67. printf '%s ' "$(< "$CUR_PROMPT_FILE")"
  68. n_tokens=0
  69. while read -e line; do
  70. # Limit generation to remaining context, with a buffer and estimating 2 chars/token for input
  71. n_predict=$((CTX_SIZE - n_tokens - ${#line} / 2 - 32))
  72. # Swap prompts when we're about to run out of context
  73. if ((n_predict <= 0)); then
  74. wait # for background main (below) to finish with next prompt
  75. mv "$NEXT_PROMPT_FILE" "$CUR_PROMPT_FILE"
  76. mv "$NEXT_PROMPT_CACHE" "$CUR_PROMPT_CACHE"
  77. sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE"
  78. echo '...' >>"$NEXT_PROMPT_FILE"
  79. cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE"
  80. n_tokens=0
  81. n_predict=$((CTX_SIZE / 2))
  82. fi
  83. echo " ${line}" >>"$CUR_PROMPT_FILE"
  84. if ((n_tokens > CTX_ROTATE_POINT)); then
  85. echo " ${line}" >>"$NEXT_PROMPT_FILE"
  86. fi
  87. n_prompt_len_pre=$(($(wc -c <"$CUR_PROMPT_FILE")))
  88. printf '%s: ' "$AI_NAME" >>"$CUR_PROMPT_FILE"
  89. ./main 2>>"$LOG" "${OPTS[@]}" \
  90. --prompt-cache "$CUR_PROMPT_CACHE" \
  91. --prompt-cache-all \
  92. --file "$CUR_PROMPT_FILE" \
  93. --reverse-prompt "${USER_NAME}:" \
  94. --n_predict "$n_predict" |
  95. skip_bytes 1 | # skip BOS token added by ./main
  96. tee "$CUR_PROMPT_FILE.tmp" | # save prompt + generation to tmp file
  97. skip_bytes "$n_prompt_len_pre" # print generation
  98. mv "$CUR_PROMPT_FILE.tmp" "$CUR_PROMPT_FILE"
  99. # if we hit n_predict instead of reverse-prompt, we need to add the prompt
  100. if [[ "$(tail -n1 "$CUR_PROMPT_FILE")" != "${USER_NAME}:" ]]; then
  101. printf '\n%s:' "$USER_NAME"
  102. printf '\n%s:' "$USER_NAME" >> "$CUR_PROMPT_FILE"
  103. fi
  104. printf ' '
  105. # HACK get num tokens from debug message
  106. # TODO get both messages in one go
  107. if ! session_size_msg="$(tail -n30 "$LOG" | grep -oE "$SESSION_SIZE_MSG_PATTERN")" ||
  108. ! sample_time_msg="$(tail -n10 "$LOG" | grep -oE "$SAMPLE_TIME_MSG_PATTERN")"; then
  109. echo >&2 "Couldn't get number of tokens from ./main output!"
  110. exit 1
  111. fi
  112. n_tokens=$(($(cut -d/ -f2 <<<"$session_size_msg") + $(cut -d/ -f2 <<<"$sample_time_msg")))
  113. if ((n_tokens > CTX_ROTATE_POINT)); then
  114. tail -c+$((n_prompt_len_pre + 1)) "$CUR_PROMPT_FILE" >>"$NEXT_PROMPT_FILE"
  115. fi
  116. # Update cache for next prompt in background, ideally during user input
  117. ./main >>"$LOG_BG" 2>&1 "${OPTS[@]}" \
  118. --prompt-cache "$NEXT_PROMPT_CACHE" \
  119. --file "$NEXT_PROMPT_FILE" \
  120. --n_predict 1 &
  121. done