cturan/llama.cpp @ 80dd7ff22fd050fed58b552cc8001aaf968b7ebf

SXX 5b359bb1e3 ggml: fix zero division in ‘dne’ calculation in CUDA COUNT_EQUAL operator when ‘ne’ is small (#10213)		hace 1 año
..
template-instances	69c487f4ed CUDA: MMQ code deduplication + iquant support (#8495)	hace 1 año
vendors	c35e586ea5 musa: enable building fat binaries, enable unified memory, and disable Flash Attention on QY1 (MTT S80) (#9526)	hace 1 año
acc.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
acc.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
arange.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
arange.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
argmax.cu	fabdc3bda3 ggml/ex: calculate accuracy in graph, adapt MNIST (ggml/980)	hace 1 año
argmax.cuh	fabdc3bda3 ggml/ex: calculate accuracy in graph, adapt MNIST (ggml/980)	hace 1 año
argsort.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	hace 1 año
argsort.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
binbcast.cu	424c5d00a9 ggml/examples: add backend support for numerical optimization (ggml/949)	hace 1 año
binbcast.cuh	424c5d00a9 ggml/examples: add backend support for numerical optimization (ggml/949)	hace 1 año
clamp.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
clamp.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
common.cuh	fabdc3bda3 ggml/ex: calculate accuracy in graph, adapt MNIST (ggml/980)	hace 1 año
concat.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
concat.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
conv-transpose-1d.cu	fde13b3bb9 feat: cuda implementation for `ggml_conv_transpose_1d` (ggml/854)	hace 1 año
conv-transpose-1d.cuh	fde13b3bb9 feat: cuda implementation for `ggml_conv_transpose_1d` (ggml/854)	hace 1 año
convert.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
convert.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
count-equal.cu	5b359bb1e3 ggml: fix zero division in ‘dne’ calculation in CUDA COUNT_EQUAL operator when ‘ne’ is small (#10213)	hace 1 año
count-equal.cuh	fabdc3bda3 ggml/ex: calculate accuracy in graph, adapt MNIST (ggml/980)	hace 1 año
cpy.cu	116efee0ee cuda: add q8_0->f32 cpy operation (#9571)	hace 1 año
cpy.cuh	8c60a8a462 increase cuda_cpy block size (ggml/996)	hace 1 año
cross-entropy-loss.cu	424c5d00a9 ggml/examples: add backend support for numerical optimization (ggml/949)	hace 1 año
cross-entropy-loss.cuh	424c5d00a9 ggml/examples: add backend support for numerical optimization (ggml/949)	hace 1 año
dequantize.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
diagmask.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
diagmask.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
dmmv.cu	13dca2a54a Vectorize load instructions in dmmv f16 CUDA kernel (#9816)	hace 1 año
dmmv.cuh	7a11eb3a26 cuda : fix dmmv cols requirement to 2*GGML_CUDA_DMMV_X (#8800)	hace 1 año
fattn-common.cuh	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	hace 1 año
fattn-tile-f16.cu	fabdc3bda3 ggml/ex: calculate accuracy in graph, adapt MNIST (ggml/980)	hace 1 año
fattn-tile-f16.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
fattn-tile-f32.cu	c35e586ea5 musa: enable building fat binaries, enable unified memory, and disable Flash Attention on QY1 (MTT S80) (#9526)	hace 1 año
fattn-tile-f32.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
fattn-vec-f16.cuh	fabdc3bda3 ggml/ex: calculate accuracy in graph, adapt MNIST (ggml/980)	hace 1 año
fattn-vec-f32.cuh	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	hace 1 año
fattn-wmma-f16.cuh	e11bd856d5 CPU/CUDA: Gemma 2 FlashAttention support (#8542)	hace 1 año
fattn.cu	841f27abdb metal : optimize FA kernels (#10171)	hace 1 año
fattn.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
getrows.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	hace 1 año
getrows.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
im2col.cu	80273a306d CUDA: fix 1D im2col, add tests (ggml/993)	hace 1 año
im2col.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
mma.cuh	808aba3916 CUDA: optimize and refactor MMQ (#8416)	hace 1 año
mmq.cu	c39665f589 CUDA: fix MMQ for non-contiguous src0, add tests (#10021)	hace 1 año
mmq.cuh	5af118efda CUDA: fix --split-mode row race condition (#9413)	hace 1 año
mmvq.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	hace 1 año
mmvq.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
norm.cu	2d5dd7bb3f ggml : add epsilon as a parameter for group_norm (#8818)	hace 1 año
norm.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
opt-step-adamw.cu	424c5d00a9 ggml/examples: add backend support for numerical optimization (ggml/949)	hace 1 año
opt-step-adamw.cuh	424c5d00a9 ggml/examples: add backend support for numerical optimization (ggml/949)	hace 1 año
out-prod.cu	d13edb17ed ggml : fix builds (#0)	hace 1 año
out-prod.cuh	424c5d00a9 ggml/examples: add backend support for numerical optimization (ggml/949)	hace 1 año
pad.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
pad.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
pool2d.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
pool2d.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
quantize.cu	2b1f616b20 ggml : reduce hash table reset cost (#8698)	hace 1 año
quantize.cuh	808aba3916 CUDA: optimize and refactor MMQ (#8416)	hace 1 año
rope.cu	06943a69f6 ggml : move rope type enum to ggml.h (#8949)	hace 1 año
rope.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
scale.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
scale.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
softmax.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
softmax.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
sum.cu	5cb12f6839 CUDA: fix sum.cu compilation for CUDA < 11.7 (#9562)	hace 1 año
sum.cuh	202084d31d tests: add gradient tests for all backends (ggml/932)	hace 1 año
sumrows.cu	231cff5f6f sync : ggml	hace 1 año
sumrows.cuh	231cff5f6f sync : ggml	hace 1 año
tsembd.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
tsembd.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
unary.cu	2a63caaa69 RWKV v6: RWKV_WKV op CUDA implementation (#9454)	hace 1 año
unary.cuh	2a63caaa69 RWKV v6: RWKV_WKV op CUDA implementation (#9454)	hace 1 año
upscale.cu	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
upscale.cuh	f3f65429c4 llama : reorganize source code + improve CMake (#8006)	hace 1 año
vecdotq.cuh	69c487f4ed CUDA: MMQ code deduplication + iquant support (#8495)	hace 1 año
wkv6.cu	3bcd40b3c5 Optimize RWKV6 Operator Naming and Implement Multi-core CPU/ SYCL Acceleration (#10133)	hace 1 año
wkv6.cuh	3bcd40b3c5 Optimize RWKV6 Operator Naming and Implement Multi-core CPU/ SYCL Acceleration (#10133)	hace 1 año