sqrt.cl 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. #pragma OPENCL EXTENSION cl_khr_fp16 : enable
  2. kernel void kernel_sqrt_cont_f32(
  3. global float * src0,
  4. ulong offset0,
  5. global float * dst,
  6. ulong offsetd
  7. ) {
  8. src0 = (global float*)((global char*)src0 + offset0);
  9. dst = (global float*)((global char*)dst + offsetd);
  10. uint gid = get_global_id(0);
  11. dst[gid] = sqrt(src0[gid]);
  12. }
  13. kernel void kernel_sqrt_cont_f32_4(
  14. global float4 * src0,
  15. ulong offset0,
  16. global float4 * dst,
  17. ulong offsetd
  18. ) {
  19. src0 = (global float4*)((global char*)src0 + offset0);
  20. dst = (global float4*)((global char*)dst + offsetd);
  21. uint gid = get_global_id(0);
  22. dst[gid] = sqrt(src0[gid]);
  23. }
  24. kernel void kernel_sqrt_cont_f16(
  25. global half * src0,
  26. ulong offset0,
  27. global half * dst,
  28. ulong offsetd
  29. ) {
  30. src0 = (global half*)((global char*)src0 + offset0);
  31. dst = (global half*)((global char*)dst + offsetd);
  32. uint gid = get_global_id(0);
  33. dst[gid] = convert_half(sqrt(convert_float(src0[gid])));
  34. }
  35. kernel void kernel_sqrt_cont_f16_4(
  36. global half4 * src0,
  37. ulong offset0,
  38. global half4 * dst,
  39. ulong offsetd
  40. ) {
  41. src0 = (global half4*)((global char*)src0 + offset0);
  42. dst = (global half4*)((global char*)dst + offsetd);
  43. uint gid = get_global_id(0);
  44. dst[gid] = convert_half4(sqrt(convert_float4(src0[gid])));
  45. }