embd_input.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. import ctypes
  2. from ctypes import cdll, c_char_p, c_void_p, POINTER, c_float, c_int
  3. import numpy as np
  4. import os
  5. libc = cdll.LoadLibrary("./libembdinput.so")
  6. libc.sampling.restype=c_char_p
  7. libc.create_mymodel.restype=c_void_p
  8. libc.eval_string.argtypes=[c_void_p, c_char_p]
  9. libc.sampling.argtypes=[c_void_p]
  10. libc.eval_float.argtypes=[c_void_p, POINTER(c_float), c_int]
  11. class MyModel:
  12. def __init__(self, args):
  13. argc = len(args)
  14. c_str = [c_char_p(i.encode()) for i in args]
  15. args_c = (c_char_p * argc)(*c_str)
  16. self.model = c_void_p(libc.create_mymodel(argc, args_c))
  17. self.max_tgt_len = 512
  18. self.print_string_eval = True
  19. def __del__(self):
  20. libc.free_mymodel(self.model)
  21. def eval_float(self, x):
  22. libc.eval_float(self.model, x.astype(np.float32).ctypes.data_as(POINTER(c_float)), x.shape[1])
  23. def eval_string(self, x):
  24. libc.eval_string(self.model, x.encode()) # c_char_p(x.encode()))
  25. if self.print_string_eval:
  26. print(x)
  27. def eval_token(self, x):
  28. libc.eval_id(self.model, x)
  29. def sampling(self):
  30. s = libc.sampling(self.model)
  31. return s
  32. def stream_generate(self, end="</s>"):
  33. ret = b""
  34. end = end.encode()
  35. for _ in range(self.max_tgt_len):
  36. tmp = self.sampling()
  37. ret += tmp
  38. yield tmp
  39. if ret.endswith(end):
  40. break
  41. def generate_with_print(self, end="</s>"):
  42. ret = b""
  43. for i in self.stream_generate(end=end):
  44. ret += i
  45. print(i.decode(errors="replace"), end="", flush=True)
  46. print("")
  47. return ret.decode(errors="replace")
  48. def generate(self, end="</s>"):
  49. text = b"".join(self.stream_generate(end=end))
  50. return text.decode(errors="replace")
  51. if __name__ == "__main__":
  52. model = MyModel(["main", "--model", "../llama.cpp/models/ggml-vic13b-q4_1.bin", "-c", "2048"])
  53. model.eval_string("""user: what is the color of the flag of UN?""")
  54. x = np.random.random((5120,10))# , dtype=np.float32)
  55. model.eval_float(x)
  56. model.eval_string("""assistant:""")
  57. for i in model.generate():
  58. print(i.decode(errors="replace"), end="", flush=True)