cturan/llama.cpp @ 5ed087573e1f326cfa70e29c1895d074a7a1a00c

Georgi Gerganov e079bffb66 cuda : fix FA Q src index (1 -> 0) (#9374)		1 vuosi sitten
..
template-instances	69c487f4ed CUDA: MMQ code deduplication + iquant support (#8495)	1 vuosi sitten
vendors	439b3fc75a cuda : organize vendor-specific headers into vendors directory (#8746)	1 vuosi sitten
acc.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
acc.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
arange.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
arange.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
argsort.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	1 vuosi sitten
argsort.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
binbcast.cu	231cff5f6f sync : ggml	1 vuosi sitten
binbcast.cuh	231cff5f6f sync : ggml	1 vuosi sitten
clamp.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
clamp.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
common.cuh	439b3fc75a cuda : organize vendor-specific headers into vendors directory (#8746)	1 vuosi sitten
concat.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
concat.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
conv-transpose-1d.cu	fde13b3bb9 feat: cuda implementation for `ggml_conv_transpose_1d` (ggml/854)	1 vuosi sitten
conv-transpose-1d.cuh	fde13b3bb9 feat: cuda implementation for `ggml_conv_transpose_1d` (ggml/854)	1 vuosi sitten
convert.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
convert.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
cpy.cu	4db04784f9 cuda : fix defrag with quantized KV (#9319)	1 vuosi sitten
cpy.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
cross-entropy-loss.cu	202084d31d tests: add gradient tests for all backends (ggml/932)	1 vuosi sitten
cross-entropy-loss.cuh	231cff5f6f sync : ggml	1 vuosi sitten
dequantize.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
diagmask.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
diagmask.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
dmmv.cu	7a11eb3a26 cuda : fix dmmv cols requirement to 2*GGML_CUDA_DMMV_X (#8800)	1 vuosi sitten
dmmv.cuh	7a11eb3a26 cuda : fix dmmv cols requirement to 2*GGML_CUDA_DMMV_X (#8800)	1 vuosi sitten
fattn-common.cuh	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	1 vuosi sitten
fattn-tile-f16.cu	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	1 vuosi sitten
fattn-tile-f16.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
fattn-tile-f32.cu	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	1 vuosi sitten
fattn-tile-f32.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
fattn-vec-f16.cuh	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	1 vuosi sitten
fattn-vec-f32.cuh	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	1 vuosi sitten
fattn-wmma-f16.cuh	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	1 vuosi sitten
fattn.cu	e079bffb66 cuda : fix FA Q src index (1 -> 0) (#9374)	1 vuosi sitten
fattn.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
getrows.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	1 vuosi sitten
getrows.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
im2col.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
im2col.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
mma.cuh	808aba3916 CUDA: optimize and refactor MMQ (#8416)	1 vuosi sitten
mmq.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	1 vuosi sitten
mmq.cuh	2b1f616b20 ggml : reduce hash table reset cost (#8698)	1 vuosi sitten
mmvq.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	1 vuosi sitten
mmvq.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
norm.cu	2d5dd7bb3f ggml : add epsilon as a parameter for group_norm (#8818)	1 vuosi sitten
norm.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
pad.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
pad.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
pool2d.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
pool2d.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
quantize.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	1 vuosi sitten
quantize.cuh	808aba3916 CUDA: optimize and refactor MMQ (#8416)	1 vuosi sitten
rope.cu	06943a69f6 ggml : move rope type enum to ggml.h (#8949)	1 vuosi sitten
rope.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
scale.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
scale.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
softmax.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
softmax.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
sum.cu	202084d31d tests: add gradient tests for all backends (ggml/932)	1 vuosi sitten
sum.cuh	202084d31d tests: add gradient tests for all backends (ggml/932)	1 vuosi sitten
sumrows.cu	231cff5f6f sync : ggml	1 vuosi sitten
sumrows.cuh	231cff5f6f sync : ggml	1 vuosi sitten
tsembd.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
tsembd.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
unary.cu	202084d31d tests: add gradient tests for all backends (ggml/932)	1 vuosi sitten
unary.cuh	202084d31d tests: add gradient tests for all backends (ggml/932)	1 vuosi sitten
upscale.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
upscale.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	1 vuosi sitten
vecdotq.cuh	69c487f4ed CUDA: MMQ code deduplication + iquant support (#8495)	1 vuosi sitten