CMakeLists.txt 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. function(ggml_add_cpu_backend_variant_impl tag_name)
  2. if (tag_name)
  3. set(GGML_CPU_NAME ggml-cpu-${tag_name})
  4. else()
  5. set(GGML_CPU_NAME ggml-cpu)
  6. endif()
  7. ggml_add_backend_library(${GGML_CPU_NAME})
  8. list (APPEND GGML_CPU_SOURCES
  9. ggml-cpu/ggml-cpu.c
  10. ggml-cpu/ggml-cpu.cpp
  11. ggml-cpu/ggml-cpu-aarch64.cpp
  12. ggml-cpu/ggml-cpu-aarch64.h
  13. ggml-cpu/ggml-cpu-hbm.cpp
  14. ggml-cpu/ggml-cpu-hbm.h
  15. ggml-cpu/ggml-cpu-quants.c
  16. ggml-cpu/ggml-cpu-quants.h
  17. ggml-cpu/ggml-cpu-traits.cpp
  18. ggml-cpu/ggml-cpu-traits.h
  19. ggml-cpu/amx/amx.cpp
  20. ggml-cpu/amx/amx.h
  21. ggml-cpu/amx/mmq.cpp
  22. ggml-cpu/amx/mmq.h
  23. ggml-cpu/ggml-cpu-impl.h
  24. )
  25. target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
  26. target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
  27. if (APPLE AND GGML_ACCELERATE)
  28. find_library(ACCELERATE_FRAMEWORK Accelerate)
  29. if (ACCELERATE_FRAMEWORK)
  30. message(STATUS "Accelerate framework found")
  31. target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_ACCELERATE)
  32. target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_NEW_LAPACK)
  33. target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
  34. target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
  35. else()
  36. message(WARNING "Accelerate framework not found")
  37. endif()
  38. endif()
  39. if (GGML_OPENMP)
  40. find_package(OpenMP)
  41. if (OpenMP_FOUND)
  42. target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
  43. target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
  44. else()
  45. message(WARNING "OpenMP not found")
  46. endif()
  47. endif()
  48. if (GGML_LLAMAFILE)
  49. target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_LLAMAFILE)
  50. list(APPEND GGML_CPU_SOURCES
  51. ggml-cpu/llamafile/sgemm.cpp
  52. ggml-cpu/llamafile/sgemm.h)
  53. endif()
  54. if (GGML_CPU_HBM)
  55. find_library(memkind memkind REQUIRED)
  56. message(STATUS "Using memkind for CPU HBM")
  57. target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
  58. target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
  59. endif()
  60. if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
  61. CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
  62. (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
  63. CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
  64. message(STATUS "ARM detected")
  65. if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
  66. message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
  67. else()
  68. check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
  69. if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
  70. list(APPEND ARCH_FLAGS -mfp16-format=ieee)
  71. endif()
  72. if (GGML_NATIVE)
  73. # -mcpu=native does not always enable all the features in some compilers,
  74. # so we check for them manually and enable them if available
  75. execute_process(
  76. COMMAND ${CMAKE_C_COMPILER} -mcpu=native -E -v -
  77. INPUT_FILE "/dev/null"
  78. OUTPUT_QUIET
  79. ERROR_VARIABLE ARM_MCPU
  80. RESULT_VARIABLE ARM_MCPU_RESULT
  81. )
  82. if (NOT ARM_MCPU_RESULT)
  83. string(REGEX MATCH "-mcpu=[^ ']+" ARM_MCPU_FLAG "${ARM_MCPU}")
  84. endif()
  85. if ("${ARM_MCPU_FLAG}" STREQUAL "")
  86. set(ARM_MCPU_FLAG -mcpu=native)
  87. message(STATUS "ARM -mcpu not found, -mcpu=native will be used")
  88. endif()
  89. include(CheckCXXSourceRuns)
  90. function(check_arm_feature tag code)
  91. set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
  92. set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+${tag}")
  93. check_cxx_source_runs("${code}" GGML_MACHINE_SUPPORTS_${tag})
  94. if (GGML_MACHINE_SUPPORTS_${tag})
  95. set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+${tag}" PARENT_SCOPE)
  96. else()
  97. set(CMAKE_REQUIRED_FLAGS "${ARM_MCPU_FLAG}+no${tag}")
  98. check_cxx_source_compiles("int main() { return 0; }" GGML_MACHINE_SUPPORTS_no${tag})
  99. if (GGML_MACHINE_SUPPORTS_no${tag})
  100. set(ARM_MCPU_FLAG_FIX "${ARM_MCPU_FLAG_FIX}+no${tag}" PARENT_SCOPE)
  101. endif()
  102. endif()
  103. set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
  104. endfunction()
  105. check_arm_feature(dotprod "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }")
  106. check_arm_feature(i8mm "#include <arm_neon.h>\nint main() { int8x16_t _a, _b; volatile int32x4_t _s = vmmlaq_s32(_s, _a, _b); return 0; }")
  107. check_arm_feature(sve "#include <arm_sve.h>\nint main() { svfloat32_t _a, _b; volatile svfloat32_t _c = svadd_f32_z(svptrue_b8(), _a, _b); return 0; }")
  108. check_arm_feature(sme "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")
  109. list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
  110. else()
  111. if (GGML_CPU_ARM_ARCH)
  112. list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
  113. endif()
  114. endif()
  115. # show enabled features
  116. if (CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
  117. set(FEAT_INPUT_FILE "NUL")
  118. else()
  119. set(FEAT_INPUT_FILE "/dev/null")
  120. endif()
  121. execute_process(
  122. COMMAND ${CMAKE_C_COMPILER} ${ARCH_FLAGS} -dM -E -
  123. INPUT_FILE ${FEAT_INPUT_FILE}
  124. OUTPUT_VARIABLE ARM_FEATURE
  125. RESULT_VARIABLE ARM_FEATURE_RESULT
  126. )
  127. if (ARM_FEATURE_RESULT)
  128. message(WARNING "Failed to get ARM features")
  129. else()
  130. foreach(feature DOTPROD SVE MATMUL_INT8 FMA FP16_VECTOR_ARITHMETIC SME)
  131. string(FIND "${ARM_FEATURE}" "__ARM_FEATURE_${feature} 1" feature_pos)
  132. if (NOT ${feature_pos} EQUAL -1)
  133. message(STATUS "ARM feature ${feature} enabled")
  134. endif()
  135. endforeach()
  136. endif()
  137. endif()
  138. elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
  139. (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
  140. CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
  141. message(STATUS "x86 detected")
  142. if (MSVC)
  143. # instruction set detection for MSVC only
  144. if (GGML_NATIVE)
  145. include(ggml-cpu/cmake/FindSIMD.cmake)
  146. endif ()
  147. if (GGML_AVX512)
  148. list(APPEND ARCH_FLAGS /arch:AVX512)
  149. # /arch:AVX512 includes: __AVX512F__, __AVX512CD__, __AVX512BW__, __AVX512DQ__, and __AVX512VL__
  150. # MSVC has no compile-time flags enabling specific
  151. # AVX512 extensions, neither it defines the
  152. # macros corresponding to the extensions.
  153. # Do it manually.
  154. list(APPEND ARCH_DEFINITIONS GGML_AVX512)
  155. if (GGML_AVX512_VBMI)
  156. list(APPEND ARCH_DEFINITIONS __AVX512VBMI__)
  157. if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
  158. list(APPEND ARCH_FLAGS -mavx512vbmi)
  159. endif()
  160. endif()
  161. if (GGML_AVX512_VNNI)
  162. list(APPEND ARCH_DEFINITIONS __AVX512VNNI__ GGML_AVX512_VNNI)
  163. if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
  164. list(APPEND ARCH_FLAGS -mavx512vnni)
  165. endif()
  166. endif()
  167. if (GGML_AVX512_BF16)
  168. list(APPEND ARCH_DEFINITIONS __AVX512BF16__ GGML_AVX512_BF16)
  169. if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
  170. list(APPEND ARCH_FLAGS -mavx512bf16)
  171. endif()
  172. endif()
  173. if (GGML_AMX_TILE)
  174. list(APPEND ARCH_DEFINITIONS __AMX_TILE__ GGML_AMX_TILE)
  175. endif()
  176. if (GGML_AMX_INT8)
  177. list(APPEND ARCH_DEFINITIONS __AMX_INT8__ GGML_AMX_INT8)
  178. endif()
  179. if (GGML_AMX_BF16)
  180. list(APPEND ARCH_DEFINITIONS __AMX_BF16__ GGML_AMX_BF16)
  181. endif()
  182. elseif (GGML_AVX2)
  183. list(APPEND ARCH_FLAGS /arch:AVX2)
  184. list(APPEND ARCH_DEFINITIONS GGML_AVX2 GGML_FMA GGML_F16C)
  185. elseif (GGML_AVX)
  186. list(APPEND ARCH_FLAGS /arch:AVX)
  187. list(APPEND ARCH_DEFINITIONS GGML_AVX)
  188. else ()
  189. list(APPEND ARCH_FLAGS /arch:SSE4.2)
  190. list(APPEND ARCH_DEFINITIONS GGML_SSE42)
  191. endif()
  192. if (GGML_AVX_VNNI)
  193. list(APPEND ARCH_DEFINITIONS __AVXVNNI__ GGML_AVX_VNNI)
  194. endif()
  195. if (GGML_BMI2)
  196. # MSVC does not define macro __BMI2__
  197. list(APPEND ARCH_DEFINITIONS __BMI2__ GGML_BMI2)
  198. endif()
  199. else ()
  200. if (GGML_NATIVE)
  201. list(APPEND ARCH_FLAGS -march=native)
  202. else ()
  203. list(APPEND ARCH_FLAGS -msse4.2)
  204. list(APPEND ARCH_DEFINITIONS GGML_SSE42)
  205. if (GGML_F16C)
  206. list(APPEND ARCH_FLAGS -mf16c)
  207. list(APPEND ARCH_DEFINITIONS GGML_F16C)
  208. endif()
  209. if (GGML_FMA)
  210. list(APPEND ARCH_FLAGS -mfma)
  211. list(APPEND ARCH_DEFINITIONS GGML_FMA)
  212. endif()
  213. if (GGML_BMI2)
  214. list(APPEND ARCH_FLAGS -mbmi2)
  215. list(APPEND ARCH_DEFINITIONS GGML_BMI2)
  216. endif()
  217. if (GGML_AVX)
  218. list(APPEND ARCH_FLAGS -mavx)
  219. list(APPEND ARCH_DEFINITIONS GGML_AVX)
  220. endif()
  221. if (GGML_AVX2)
  222. list(APPEND ARCH_FLAGS -mavx2)
  223. list(APPEND ARCH_DEFINITIONS GGML_AVX2)
  224. endif()
  225. if (GGML_AVX_VNNI)
  226. list(APPEND ARCH_FLAGS -mavxvnni)
  227. list(APPEND ARCH_DEFINITIONS GGML_AVX_VNNI)
  228. endif()
  229. if (GGML_AVX512)
  230. list(APPEND ARCH_FLAGS -mavx512f)
  231. list(APPEND ARCH_FLAGS -mavx512cd)
  232. list(APPEND ARCH_FLAGS -mavx512vl)
  233. list(APPEND ARCH_FLAGS -mavx512dq)
  234. list(APPEND ARCH_FLAGS -mavx512bw)
  235. list(APPEND ARCH_DEFINITIONS GGML_AVX512)
  236. endif()
  237. if (GGML_AVX512_VBMI)
  238. list(APPEND ARCH_FLAGS -mavx512vbmi)
  239. list(APPEND ARCH_DEFINITIONS GGML_AVX512_VBMI)
  240. endif()
  241. if (GGML_AVX512_VNNI)
  242. list(APPEND ARCH_FLAGS -mavx512vnni)
  243. list(APPEND ARCH_DEFINITIONS GGML_AVX512_VNNI)
  244. endif()
  245. if (GGML_AVX512_BF16)
  246. list(APPEND ARCH_FLAGS -mavx512bf16)
  247. list(APPEND ARCH_DEFINITIONS GGML_AVX512_BF16)
  248. endif()
  249. if (GGML_AMX_TILE)
  250. list(APPEND ARCH_FLAGS -mamx-tile)
  251. list(APPEND ARCH_DEFINITIONS GGML_AMX_TILE)
  252. endif()
  253. if (GGML_AMX_INT8)
  254. list(APPEND ARCH_FLAGS -mamx-int8)
  255. list(APPEND ARCH_DEFINITIONS GGML_AMX_INT8)
  256. endif()
  257. if (GGML_AMX_BF16)
  258. list(APPEND ARCH_FLAGS -mamx-bf16)
  259. list(APPEND ARCH_DEFINITIONS GGML_AMX_BF16)
  260. endif()
  261. endif()
  262. endif()
  263. elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
  264. message(STATUS "PowerPC detected")
  265. if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
  266. file(READ "/proc/cpuinfo" POWER10_M)
  267. elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc")
  268. execute_process(COMMAND bash -c "prtconf |grep 'Implementation' | head -n 1" OUTPUT_VARIABLE POWER10_M)
  269. endif()
  270. string(REGEX MATCHALL "POWER *([0-9]+)" MATCHED_STRING "${POWER10_M}")
  271. string(REGEX REPLACE "POWER *([0-9]+)" "\\1" EXTRACTED_NUMBER "${MATCHED_STRING}")
  272. if (EXTRACTED_NUMBER GREATER_EQUAL 10)
  273. list(APPEND ARCH_FLAGS -mcpu=power10 -mpowerpc64)
  274. elseif (EXTRACTED_NUMBER EQUAL 9)
  275. list(APPEND ARCH_FLAGS -mcpu=power9 -mpowerpc64)
  276. elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
  277. list(APPEND ARCH_FLAGS -mcpu=powerpc64le -mtune=native)
  278. else()
  279. list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64)
  280. endif()
  281. elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
  282. message(STATUS "loongarch64 detected")
  283. list(APPEND ARCH_FLAGS -march=loongarch64)
  284. if (GGML_LASX)
  285. list(APPEND ARCH_FLAGS -mlasx)
  286. endif()
  287. if (GGML_LSX)
  288. list(APPEND ARCH_FLAGS -mlsx)
  289. endif()
  290. elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
  291. message(STATUS "RISC-V detected")
  292. if (GGML_RVV)
  293. list(APPEND ARCH_FLAGS -march=rv64gcv -mabi=lp64d)
  294. endif()
  295. elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
  296. message(STATUS "s390x detected")
  297. file(READ "/proc/cpuinfo" CPUINFO_CONTENTS)
  298. string(REGEX REPLACE "machine[ \t\r\n]*=[ \t\r\n]*([0-9]+)" "\\1" S390X_M ${CPUINFO_CONTENTS})
  299. # TODO: Separation to determine activation of VX/VXE/VXE2
  300. if (${S390X_M} MATCHES "8561|8562")
  301. message(STATUS "z15 target")
  302. list(APPEND ARCH_FLAGS -march=z15 -mtune=z15)
  303. elseif (${S390X_M} MATCHES "3931")
  304. message(STATUS "z16 target")
  305. list(APPEND ARCH_FLAGS -march=z16 -mtune=z16)
  306. else()
  307. message(STATUS "Unknown target")
  308. message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
  309. list(APPEND ARCH_FLAGS -march=native -mtune=native)
  310. endif()
  311. if (GGML_VXE)
  312. list(APPEND ARCH_FLAGS -mvx -mzvector)
  313. endif()
  314. else()
  315. message(STATUS "Unknown architecture")
  316. endif()
  317. if (GGML_CPU_AARCH64)
  318. target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_AARCH64)
  319. endif()
  320. if (GGML_CPU_KLEIDIAI)
  321. message(STATUS "Using KleidiAI optimized kernels if applicable")
  322. # Disable the KleidiAI tests
  323. set(KLEIDIAI_BUILD_TESTS OFF)
  324. # Fetch KleidiAI sources:
  325. include(FetchContent)
  326. set(KLEIDIAI_COMMIT_TAG "v1.3.0")
  327. set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
  328. set(KLEIDIAI_ARCHIVE_MD5 "060bd2dc64642b091f461cc8dd7426d9")
  329. if (POLICY CMP0135)
  330. cmake_policy(SET CMP0135 NEW)
  331. endif()
  332. FetchContent_Declare(KleidiAI_Download
  333. URL ${KLEIDIAI_DOWNLOAD_URL}
  334. DOWNLOAD_EXTRACT_TIMESTAMP NEW
  335. URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5})
  336. FetchContent_MakeAvailable(KleidiAI_Download)
  337. FetchContent_GetProperties(KleidiAI_Download
  338. SOURCE_DIR KLEIDIAI_SRC
  339. POPULATED KLEIDIAI_POPULATED)
  340. if (NOT KLEIDIAI_POPULATED)
  341. message(FATAL_ERROR "KleidiAI source downloaded failed.")
  342. endif()
  343. add_compile_definitions(GGML_USE_CPU_KLEIDIAI)
  344. # Remove kleidiai target after fetching it
  345. if (TARGET kleidiai)
  346. set_target_properties(kleidiai PROPERTIES EXCLUDE_FROM_ALL TRUE)
  347. endif()
  348. list(APPEND GGML_CPU_SOURCES
  349. ggml-cpu/kleidiai/kleidiai.cpp
  350. ggml-cpu/kleidiai/kernels.cpp
  351. ggml-cpu/kleidiai/kleidiai.h
  352. ggml-cpu/kleidiai/kernels.h
  353. )
  354. # KleidiAI
  355. include_directories(
  356. ${KLEIDIAI_SRC}/
  357. ${KLEIDIAI_SRC}/kai/
  358. ${KLEIDIAI_SRC}/kai/ukernels/
  359. ${KLEIDIAI_SRC}/kai/ukernels/matmul/
  360. ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/
  361. ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
  362. set(ARCH_FLAGS_TEMP "${ARCH_FLAGS}")
  363. if (NOT ARCH_FLAGS_TEMP)
  364. string(REGEX MATCH "-march=[^ ]+" ARCH_FLAGS_TEMP "${CMAKE_C_FLAGS}")
  365. endif()
  366. string(FIND "${ARCH_FLAGS_TEMP}" "+dotprod" DOTPROD_ENABLED)
  367. string(FIND "${ARCH_FLAGS_TEMP}" "+i8mm" I8MM_ENABLED)
  368. string(FIND "${ARCH_FLAGS_TEMP}" "+sme" SME_ENABLED)
  369. set(PRIVATE_ARCH_FLAGS ${ARCH_FLAGS})
  370. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c)
  371. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon.c)
  372. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32_neon.c)
  373. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c)
  374. if (NOT DOTPROD_ENABLED MATCHES -1)
  375. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c)
  376. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod.c)
  377. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod.c)
  378. endif()
  379. if (NOT I8MM_ENABLED MATCHES -1)
  380. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm.c)
  381. endif()
  382. if (NOT SME_ENABLED MATCHES -1)
  383. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa.c)
  384. list(APPEND GGML_KLEIDIAI_SOURCES ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot.c)
  385. set(PRIVATE_ARCH_FLAGS "${PRIVATE_ARCH_FLAGS}+sve+sve2")
  386. endif()
  387. set_source_files_properties(${GGML_KLEIDIAI_SOURCES} PROPERTIES COMPILE_OPTIONS "${PRIVATE_ARCH_FLAGS}")
  388. list(APPEND GGML_CPU_SOURCES ${GGML_KLEIDIAI_SOURCES})
  389. endif()
  390. message(STATUS "Adding CPU backend variant ${GGML_CPU_NAME}: ${ARCH_FLAGS} ${ARCH_DEFINITIONS}")
  391. target_sources(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_SOURCES})
  392. target_compile_options(${GGML_CPU_NAME} PRIVATE ${ARCH_FLAGS})
  393. target_compile_definitions(${GGML_CPU_NAME} PRIVATE ${ARCH_DEFINITIONS})
  394. if (GGML_BACKEND_DL)
  395. if (GGML_NATIVE)
  396. # the feature check relies on ARCH_DEFINITIONS, but it is not set with GGML_NATIVE
  397. message(FATAL_ERROR "GGML_NATIVE is not compatible with GGML_BACKEND_DL, consider using GGML_CPU_ALL_VARIANTS")
  398. endif()
  399. # The feature detection code is compiled as a separate target so that
  400. # it can be built without the architecture flags
  401. # Since multiple variants of the CPU backend may be included in the same
  402. # build, using set_source_files_properties() to set the arch flags is not possible
  403. set(GGML_CPU_FEATS_NAME ${GGML_CPU_NAME}-feats)
  404. add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/cpu-feats-x86.cpp)
  405. target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include)
  406. target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARCH_DEFINITIONS})
  407. target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED)
  408. set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
  409. target_link_libraries(${GGML_CPU_NAME} PRIVATE ${GGML_CPU_FEATS_NAME})
  410. endif()
  411. if (EMSCRIPTEN)
  412. set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128")
  413. endif()
  414. endfunction()