| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- #if !defined(DATA_A_F32) && !defined(DATA_A_F16)
- #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
- #endif
- #if defined(DATA_A_F32)
- #define QUANT_K 1
- #define QUANT_R 1
- #ifndef LOAD_VEC_A
- #define A_TYPE float
- #elif LOAD_VEC_A == 4
- #define A_TYPE vec4
- #elif LOAD_VEC_A == 8
- #define A_TYPE mat2x4
- #endif
- #endif
- #if defined(DATA_A_F16)
- #define QUANT_K 1
- #define QUANT_R 1
- #ifndef LOAD_VEC_A
- #define A_TYPE float16_t
- #elif LOAD_VEC_A == 4
- #define A_TYPE f16vec4
- #elif LOAD_VEC_A == 8
- #define A_TYPE f16mat2x4
- #endif
- #endif
- #if defined(DATA_A_Q4_0)
- #extension GL_EXT_shader_16bit_storage : require
- #define QUANT_K 32
- #define QUANT_R 2
- struct block_q4_0
- {
- float16_t d;
- uint8_t qs[16];
- };
- #define A_TYPE block_q4_0
- #endif
- #if defined(DATA_A_Q4_1)
- #extension GL_EXT_shader_16bit_storage : require
- #define QUANT_K 32
- #define QUANT_R 2
- struct block_q4_1
- {
- float16_t d;
- float16_t m;
- uint8_t qs[16];
- };
- #define A_TYPE block_q4_1
- #endif
- #if defined(DATA_A_Q5_0)
- #extension GL_EXT_shader_16bit_storage : require
- #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
- #define QUANT_K 32
- #define QUANT_R 2
- struct block_q5_0
- {
- float16_t d;
- uint16_t qh[2];
- uint8_t qs[16];
- };
- #define A_TYPE block_q5_0
- #endif
- #if defined(DATA_A_Q5_1)
- #extension GL_EXT_shader_16bit_storage : require
- #extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
- #define QUANT_K 32
- #define QUANT_R 2
- struct block_q5_1
- {
- float16_t d;
- float16_t m;
- uint qh;
- uint8_t qs[16];
- };
- #define A_TYPE block_q5_1
- #endif
- #if defined(DATA_A_Q8_0)
- #extension GL_EXT_shader_16bit_storage : require
- #define QUANT_K 32
- #define QUANT_R 1
- struct block_q8_0
- {
- float16_t d;
- int8_t qs[32];
- };
- #define A_TYPE block_q8_0
- #endif
- // K-quants
- #if defined(DATA_A_Q2_K)
- #extension GL_EXT_shader_16bit_storage : require
- #define QUANT_K 256
- struct block_q2_K
- {
- uint8_t scales[QUANT_K/16];
- uint8_t qs[QUANT_K/4];
- f16vec2 d;
- };
- #define A_TYPE block_q2_K
- #endif
- #if defined(DATA_A_Q3_K)
- #extension GL_EXT_shader_16bit_storage : require
- #define QUANT_K 256
- struct block_q3_K
- {
- uint8_t hmask[QUANT_K/8];
- uint8_t qs[QUANT_K/4];
- uint8_t scales[12];
- float16_t d;
- };
- #define A_TYPE block_q3_K
- #endif
- #if defined(DATA_A_Q4_K)
- #extension GL_EXT_shader_16bit_storage : require
- #define QUANT_K 256
- struct block_q4_K
- {
- f16vec2 d;
- uint8_t scales[3*QUANT_K/64];
- uint8_t qs[QUANT_K/2];
- };
- #define A_TYPE block_q4_K
- #endif
- #if defined(DATA_A_Q5_K)
- #extension GL_EXT_shader_16bit_storage : require
- #define QUANT_K 256
- struct block_q5_K
- {
- f16vec2 d;
- uint8_t scales[12];
- uint8_t qh[QUANT_K/8];
- uint8_t qs[QUANT_K/2];
- };
- #define A_TYPE block_q5_K
- #endif
- #if defined(DATA_A_Q6_K)
- #extension GL_EXT_shader_16bit_storage : require
- #define QUANT_K 256
- struct block_q6_K
- {
- uint8_t ql[QUANT_K/2];
- uint8_t qh[QUANT_K/4];
- int8_t scales[QUANT_K/16];
- float16_t d;
- };
- #define A_TYPE block_q6_K
- #endif
|