llama.vim 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. " Requires an already running llama.cpp server
  2. " To install either copy or symlink to ~/.vim/autoload/llama.vim
  3. " Then start with either :call llama#doLlamaGen(),
  4. " or add a keybind to your vimrc such as
  5. " nnoremap Z :call llama#doLlamaGen()<CR>
  6. " Similarly, you could add an insert mode keybind with
  7. " inoremap <C-B> <Cmd>call llama#doLlamaGen()<CR>
  8. "
  9. " g:llama_api_url and g:llama_overrides can be configured in your .vimrc
  10. " let g:llama_api_url = "192.168.1.10:8080"
  11. " llama_overrides can also be set through buffer/window scopes. For instance
  12. " autocmd filetype python let b:llama_overrides = {"temp": 0.2}
  13. " Could be added to your .vimrc to automatically set a lower temperature when
  14. " editing a python script
  15. " Additionally, an override dict can be stored at the top of a file
  16. " !*{"stop": ["User:"]}
  17. " Could be added to the start of your chatlog.txt to set the stopping token
  18. " These parameter dicts are merged together from lowest to highest priority:
  19. " server default -> g:llama_overrides -> w:llama_overrides ->
  20. " b:llama_overrides -> in file (!*) overrides
  21. "
  22. " Sublists (like logit_bias and stop) are overridden, not merged
  23. " Example override:
  24. " !*{"logit_bias": [[13, -5], [2, false]], "temperature": 1, "top_k": 5, "top_p": 0.5, "n_predict": 256, "repeat_last_n": 256, "repeat_penalty": 1.17647}
  25. if !exists("g:llama_api_url")
  26. let g:llama_api_url= "127.0.0.1:8080"
  27. endif
  28. if !exists("g:llama_overrides")
  29. let g:llama_overrides = {}
  30. endif
  31. const s:querydata = {"n_predict": 256, "stop": [ "\n" ], "stream": v:true }
  32. const s:curlcommand = ['curl','--data-raw', "{\"prompt\":\"### System:\"}", '--silent', '--no-buffer', '--request', 'POST', '--url', g:llama_api_url .. '/completion', '--header', "Content-Type: application/json"]
  33. let s:linedict = {}
  34. func s:callbackHandler(bufn, channel, msg)
  35. if len(a:msg) < 3
  36. return
  37. elseif a:msg[0] == "d"
  38. let l:msg = a:msg[6:-1]
  39. else
  40. let l:msg = a:msg
  41. endif
  42. let l:decoded_msg = json_decode(l:msg)
  43. let l:newtext = split(l:decoded_msg['content'], "\n", 1)
  44. if len(l:newtext) > 0
  45. call setbufline(a:bufn, s:linedict[a:bufn], getbufline(a:bufn, s:linedict[a:bufn])[0] .. newtext[0])
  46. else
  47. echo "nothing genned"
  48. endif
  49. if len(newtext) > 1
  50. let l:failed = appendbufline(a:bufn, s:linedict[a:bufn], newtext[1:-1])
  51. let s:linedict[a:bufn] = s:linedict[a:bufn] + len(newtext)-1
  52. endif
  53. if has_key(l:decoded_msg, "stop") && l:decoded_msg.stop
  54. echo "Finished generation"
  55. endif
  56. endfunction
  57. func llama#doLlamaGen()
  58. if exists("b:job")
  59. if job_status(b:job) == "run"
  60. call job_stop(b:job)
  61. return
  62. endif
  63. endif
  64. let l:cbuffer = bufnr("%")
  65. let s:linedict[l:cbuffer] = line('$')
  66. let l:buflines = getbufline(l:cbuffer, 1, 1000)
  67. let l:querydata = copy(s:querydata)
  68. call extend(l:querydata, g:llama_overrides)
  69. if exists("w:llama_overrides")
  70. call extend(l:querydata, w:llama_overrides)
  71. endif
  72. if exists("b:llama_overrides")
  73. call extend(l:querydata, b:llama_overrides)
  74. endif
  75. if l:buflines[0][0:1] == '!*'
  76. let l:userdata = json_decode(l:buflines[0][2:-1])
  77. call extend(l:querydata, l:userdata)
  78. let l:buflines = l:buflines[1:-1]
  79. endif
  80. let l:querydata.prompt = join(l:buflines, "\n")
  81. let l:curlcommand = copy(s:curlcommand)
  82. let l:curlcommand[2] = json_encode(l:querydata)
  83. let b:job = job_start(l:curlcommand, {"callback": function("s:callbackHandler", [l:cbuffer])})
  84. endfunction
  85. " Echos the tokkenization of the provided string , or cursor to end of word
  86. " Onus is placed on the user to include the preceding space
  87. func llama#tokenizeWord(...)
  88. if (a:0 > 0)
  89. let l:input = a:1
  90. else
  91. exe "normal \"*ye"
  92. let l:input = @*
  93. endif
  94. let l:querydata = {"content": l:input}
  95. let l:curlcommand = copy(s:curlcommand)
  96. let l:curlcommand[2] = json_encode(l:querydata)
  97. let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
  98. let s:token_job = job_start(l:curlcommand, {"callback": function("s:tokenizeWordCallback", [l:input])})
  99. endfunction
  100. func s:tokenizeWordCallback(plaintext, channel, msg)
  101. echo '"' .. a:plaintext ..'" - ' .. string(json_decode(a:msg).tokens)
  102. endfunction
  103. " Echos the token count of the entire buffer (or provided string)
  104. " Example usage :echo llama#tokenCount()
  105. func llama#tokenCount(...)
  106. if (a:0 > 0)
  107. let l:buflines = a:1
  108. else
  109. let l:buflines = getline(1,1000)
  110. if l:buflines[0][0:1] == '!*'
  111. let l:buflines = l:buflines[1:-1]
  112. endif
  113. let l:buflines = join(l:buflines, "\n")
  114. endif
  115. let l:querydata = {"content": l:buflines}
  116. let l:curlcommand = copy(s:curlcommand)
  117. let l:curlcommand[2] = json_encode(l:querydata)
  118. let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
  119. let s:token_job = job_start(l:curlcommand, {"callback": "s:tokenCountCallback"})
  120. endfunction
  121. func s:tokenCountCallback(channel, msg)
  122. let resp = json_decode(a:msg)
  123. echo len(resp.tokens)
  124. endfunction