1
0

spec.go 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. package convert
  2. import (
  3. "makarna/pkg/quant"
  4. )
  5. type Spec struct {
  6. Architecture string
  7. TieWordEmbeddings bool
  8. BaseQuant quant.QuantType
  9. MixMode bool
  10. SkipTensor func(name string) bool
  11. IsQuantizable func(name string, shape []int, baseQuant quant.QuantType) bool
  12. ResolveQuant func(name string, baseQuant quant.QuantType) quant.QuantType
  13. }
  14. func defaultSpec(architecture string, tieWordEmbeddings bool, baseQuant quant.QuantType, mixMode bool) *Spec {
  15. s := &Spec{
  16. Architecture: architecture,
  17. TieWordEmbeddings: tieWordEmbeddings,
  18. BaseQuant: baseQuant,
  19. MixMode: mixMode,
  20. }
  21. s.SkipTensor = func(name string) bool {
  22. return false
  23. }
  24. s.IsQuantizable = func(_ string, shape []int, baseQuant quant.QuantType) bool {
  25. if baseQuant == "" {
  26. return false
  27. }
  28. if len(shape) < 2 {
  29. return false
  30. }
  31. last := shape[len(shape)-1]
  32. return last%256 == 0
  33. }
  34. s.ResolveQuant = func(_ string, baseQuant quant.QuantType) quant.QuantType { return baseQuant }
  35. return s
  36. }
  37. // NewSpec creates a conversion spec and applies a registered model plugin (if any).
  38. func NewSpec(architecture string, tieWordEmbeddings bool, baseQuant quant.QuantType, mixMode bool) *Spec {
  39. s := defaultSpec(architecture, tieWordEmbeddings, baseQuant, mixMode)
  40. if p := pluginForArchitecture(architecture); p != nil {
  41. p.Apply(s)
  42. }
  43. return s
  44. }