| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172 |
- package nn
- import (
- "math"
- "makarna/pkg/backend/cpu"
- )
- // SiLU applies x * sigmoid(x) in-place using the fastest available kernel.
- func SiLU(x *cpu.Tensor) error {
- siluInplace(x.DataFloat32())
- return nil
- }
- // SwiGLU: out = SiLU(gate) * up. Does not mutate gate.
- func SwiGLU(gate, up, out *cpu.Tensor) error {
- gData := gate.DataFloat32()
- uData := up.DataFloat32()
- oData := out.DataFloat32()
- if len(oData) == 0 {
- return nil
- }
- if &gData[0] != &oData[0] {
- copy(oData, gData)
- }
- siluInplace(oData)
- for i := range oData {
- oData[i] *= uData[i]
- }
- return nil
- }
- // siluInplace selects the SIMD kernel when available, falling back to scalar.
- func siluInplace(data []float32) {
- if len(data) == 0 {
- return
- }
- switch {
- case hasSiLUAVX512 && cpu.SupportsAVX512():
- main := len(data) &^ 15
- if main > 0 {
- siluAVX512Asm(&data[0], main)
- }
- if main == len(data) {
- return
- }
- data = data[main:]
- case hasSiLUAVX2 && cpu.SupportsAVX2():
- main := len(data) &^ 7
- if main > 0 {
- siluAVX2Asm(&data[0], main)
- }
- if main == len(data) {
- return
- }
- data = data[main:]
- }
- siluScalar(data)
- }
- func siluScalar(data []float32) {
- for i := range data {
- v := data[i]
- data[i] = v / (1.0 + float32(math.Exp(float64(-v))))
- }
- }
|