op_addrow.comp 640 B

12345678910111213141516171819202122232425
  1. #version 450
  2. #include "common.comp"
  3. layout(local_size_x = 1) in;
  4. layout(binding = 0) buffer restrict readonly tensorInA { float inA[]; };
  5. layout(binding = 1) buffer restrict readonly tensorInB { float inB[]; };
  6. layout(binding = 2) buffer restrict writeonly tensorOut { float out_[]; };
  7. layout(push_constant) uniform PushConstants {
  8. uint inAOff;
  9. uint inBOff;
  10. uint outOff;
  11. uint row;
  12. } pcs;
  13. void main() {
  14. const uint baseIndex = gl_WorkGroupID.x * 4;
  15. for (uint x = 0; x < 4; x++) {
  16. const uint i = baseIndex + x;
  17. out_[i + pcs.outOff] = inA[i + pcs.inAOff] + inB[(i % pcs.row) + pcs.inBOff];
  18. }
  19. }