FindSIMD.cmake 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. include(CheckCSourceRuns)
  2. set(AVX_CODE "
  3. #include <immintrin.h>
  4. int main()
  5. {
  6. __m256 a;
  7. a = _mm256_set1_ps(0);
  8. return 0;
  9. }
  10. ")
  11. set(AVX512_CODE "
  12. #include <immintrin.h>
  13. int main()
  14. {
  15. __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
  16. 0, 0, 0, 0, 0, 0, 0, 0,
  17. 0, 0, 0, 0, 0, 0, 0, 0,
  18. 0, 0, 0, 0, 0, 0, 0, 0,
  19. 0, 0, 0, 0, 0, 0, 0, 0,
  20. 0, 0, 0, 0, 0, 0, 0, 0,
  21. 0, 0, 0, 0, 0, 0, 0, 0,
  22. 0, 0, 0, 0, 0, 0, 0, 0);
  23. __m512i b = a;
  24. __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
  25. return 0;
  26. }
  27. ")
  28. set(AVX2_CODE "
  29. #include <immintrin.h>
  30. int main()
  31. {
  32. __m256i a = {0};
  33. a = _mm256_abs_epi16(a);
  34. __m256i x;
  35. _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
  36. return 0;
  37. }
  38. ")
  39. set(FMA_CODE "
  40. #include <immintrin.h>
  41. int main()
  42. {
  43. __m256 acc = _mm256_setzero_ps();
  44. const __m256 d = _mm256_setzero_ps();
  45. const __m256 p = _mm256_setzero_ps();
  46. acc = _mm256_fmadd_ps( d, p, acc );
  47. return 0;
  48. }
  49. ")
  50. macro(check_sse type flags)
  51. set(__FLAG_I 1)
  52. set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
  53. foreach (__FLAG ${flags})
  54. if (NOT ${type}_FOUND)
  55. set(CMAKE_REQUIRED_FLAGS ${__FLAG})
  56. check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
  57. if (HAS_${type}_${__FLAG_I})
  58. set(${type}_FOUND TRUE CACHE BOOL "${type} support")
  59. set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
  60. endif()
  61. math(EXPR __FLAG_I "${__FLAG_I}+1")
  62. endif()
  63. endforeach()
  64. set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
  65. if (NOT ${type}_FOUND)
  66. set(${type}_FOUND FALSE CACHE BOOL "${type} support")
  67. set(${type}_FLAGS "" CACHE STRING "${type} flags")
  68. endif()
  69. mark_as_advanced(${type}_FOUND ${type}_FLAGS)
  70. endmacro()
  71. # flags are for MSVC only!
  72. check_sse("AVX" " ;/arch:AVX")
  73. if (NOT ${AVX_FOUND})
  74. set(LLAMA_AVX OFF)
  75. else()
  76. set(LLAMA_AVX ON)
  77. endif()
  78. check_sse("AVX2" " ;/arch:AVX2")
  79. check_sse("FMA" " ;/arch:AVX2")
  80. if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
  81. set(LLAMA_AVX2 OFF)
  82. else()
  83. set(LLAMA_AVX2 ON)
  84. endif()
  85. check_sse("AVX512" " ;/arch:AVX512")
  86. if (NOT ${AVX512_FOUND})
  87. set(LLAMA_AVX512 OFF)
  88. else()
  89. set(LLAMA_AVX512 ON)
  90. endif()