cturan
/
llama.cpp
spegling av https://github.com/cturan/llama.cpp


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243
							#pragma once

#include "ggml-backend.h"
#include "ggml.h"

#ifdef __cplusplus
extern "C" {
#endif

// Delta-Net linear layer activation
// Implements the complete Delta-Net gated linear attention mechanism
// This includes causal convolution preprocessing and gated delta rule computation
// k, v, q, g: [S, H, n_tokens, n_seqs] - key, value, query, gate tensors
// conv_weight: [conv_dim, 1, conv_kernel_size] - convolution kernel weights
// conv_bias: [conv_dim] - convolution bias (optional, can be NULL)
// beta: [H, n_tokens, n_seqs] - beta parameter for delta rule
// state: [S, S, H, n_seqs] - recurrent state tensor
// chunk_size: chunk size for chunked computation (0 for recurrent mode)
// use_qk_l2norm: whether to apply L2 normalization to query and key
// scale: attention scaling factor
GGML_API struct ggml_tensor * ggml_delta_net(struct ggml_context * ctx,
                                             struct ggml_tensor *  k,
                                             struct ggml_tensor *  v,
                                             struct ggml_tensor *  q,
                                             struct ggml_tensor *  g,
                                             struct ggml_tensor *  beta,
                                             struct ggml_tensor *  state,
                                             bool                  use_qk_l2norm,
                                             float                 scale);

GGML_API struct ggml_tensor * ggml_delta_net_op(struct ggml_context * ctx,
                                                struct ggml_tensor *  q,
                                                struct ggml_tensor *  k,
                                                struct ggml_tensor *  v,
                                                struct ggml_tensor *  g,
                                                struct ggml_tensor *  beta,
                                                struct ggml_tensor *  state,
                                                bool                  use_qk_l2norm,
                                                float                 scale);

#ifdef __cplusplus
}
#endif