convert.cuh 836 B

1234567891011121314151617181920212223242526
  1. #include "common.cuh"
  2. #define CUDA_DEQUANTIZE_BLOCK_SIZE 256
  3. template<typename T>
  4. using to_t_cuda_t = void (*)(const void * x, T * y, int64_t k, cudaStream_t stream);
  5. typedef to_t_cuda_t<float> to_fp32_cuda_t;
  6. typedef to_t_cuda_t<half> to_fp16_cuda_t;
  7. typedef to_t_cuda_t<nv_bfloat16> to_bf16_cuda_t;
  8. to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type);
  9. to_bf16_cuda_t ggml_get_to_bf16_cuda(ggml_type type);
  10. to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type);
  11. // TODO more general support for non-contiguous inputs
  12. template<typename T>
  13. using to_t_nc_cuda_t = void (*)(const void * x, T * y,
  14. int64_t ne00, int64_t ne01, int64_t ne02, int64_t ne03,
  15. int64_t s01, int64_t s02, int64_t s03, cudaStream_t stream);
  16. typedef to_t_nc_cuda_t<half> to_fp16_nc_cuda_t;
  17. to_fp16_nc_cuda_t ggml_get_to_fp16_nc_cuda(ggml_type type);