|
|
@@ -105,10 +105,10 @@ int32_t ggml_cann_get_device() {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * @brief Get the value of the specified environment variable (name).
|
|
|
+ * @brief Get the value of the specified environment variable (name) as lowercase.
|
|
|
* if not empty, return a std::string object
|
|
|
*/
|
|
|
-std::optional<std::string> get_env(const std::string & name) {
|
|
|
+std::optional<std::string> get_env_as_lowercase(const std::string & name) {
|
|
|
const char * val = std::getenv(name.c_str());
|
|
|
if (!val) {
|
|
|
return std::nullopt;
|
|
|
@@ -259,7 +259,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool {
|
|
|
* @param device The device ID to associate with this buffer pool.
|
|
|
*/
|
|
|
explicit ggml_cann_pool_buf_prio(int device) : device(device) {
|
|
|
- disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
|
|
+ disable_clean = parse_bool(get_env_as_lowercase("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
@@ -452,7 +452,7 @@ struct ggml_cann_pool_buf : public ggml_cann_pool {
|
|
|
* @param device The device ID to associate with this buffer pool.
|
|
|
*/
|
|
|
explicit ggml_cann_pool_buf(int device) : device(device) {
|
|
|
- disable_clean = parse_bool(get_env("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
|
|
+ disable_clean = parse_bool(get_env_as_lowercase("GGML_CANN_DISABLE_BUF_POOL_CLEAN").value_or(""));
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
@@ -764,7 +764,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool {
|
|
|
* @return A unique pointer to the created CANN pool.
|
|
|
*/
|
|
|
std::unique_ptr<ggml_cann_pool> ggml_backend_cann_context::new_pool_for_device(int device) {
|
|
|
- std::string mem_pool_type = get_env("GGML_CANN_MEM_POOL").value_or("");
|
|
|
+ std::string mem_pool_type = get_env_as_lowercase("GGML_CANN_MEM_POOL").value_or("");
|
|
|
|
|
|
if (mem_pool_type == "prio") {
|
|
|
GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device);
|
|
|
@@ -1217,7 +1217,7 @@ static void ggml_backend_cann_buffer_set_tensor(ggml_backend_buffer_t buffer,
|
|
|
// Why aclrtSynchronizeDevice?
|
|
|
|
|
|
// Only check env once.
|
|
|
- static bool weight_to_nz = parse_bool(get_env("GGML_CANN_WEIGHT_NZ").value_or("on"));
|
|
|
+ static bool weight_to_nz = parse_bool(get_env_as_lowercase("GGML_CANN_WEIGHT_NZ").value_or("on"));
|
|
|
if (!need_transform(tensor->type)) {
|
|
|
ACL_CHECK(aclrtMemcpy((char *) tensor->data + offset, size, data, size, ACL_MEMCPY_HOST_TO_DEVICE));
|
|
|
if (weight_to_nz && is_matmul_weight((const ggml_tensor *) tensor)) {
|
|
|
@@ -1442,7 +1442,7 @@ static size_t ggml_backend_cann_buffer_type_get_alloc_size(ggml_backend_buffer_t
|
|
|
int64_t ne0 = tensor->ne[0];
|
|
|
|
|
|
// Only check env once.
|
|
|
- static bool weight_to_nz = parse_bool(get_env("GGML_CANN_WEIGHT_NZ").value_or("on"));
|
|
|
+ static bool weight_to_nz = parse_bool(get_env_as_lowercase("GGML_CANN_WEIGHT_NZ").value_or("on"));
|
|
|
|
|
|
// last line must bigger than 32, because every single op deal at
|
|
|
// least 32 bytes.
|
|
|
@@ -2136,7 +2136,7 @@ static void evaluate_and_capture_cann_graph(ggml_backend_cann_context * cann_ctx
|
|
|
#endif // USE_ACL_GRAPH
|
|
|
// Only perform the graph execution if CANN graphs are not enabled, or we are capturing the graph.
|
|
|
// With the use of CANN graphs, the execution will be performed by the graph launch.
|
|
|
- static bool opt_fusion = parse_bool(get_env("GGML_CANN_OPERATOR_FUSION").value_or(""));
|
|
|
+ static bool opt_fusion = parse_bool(get_env_as_lowercase("GGML_CANN_OPERATOR_FUSION").value_or(""));
|
|
|
|
|
|
if (!use_cann_graph || cann_graph_capture_required) {
|
|
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
|
@@ -2201,7 +2201,7 @@ static enum ggml_status ggml_backend_cann_graph_compute(ggml_backend_t backend,
|
|
|
#ifdef USE_ACL_GRAPH
|
|
|
bool use_cann_graph = true;
|
|
|
|
|
|
- static bool prefill_use_graph = parse_bool(get_env("GGML_CANN_PREFILL_USE_GRAPH").value_or(""));
|
|
|
+ static bool prefill_use_graph = parse_bool(get_env_as_lowercase("GGML_CANN_PREFILL_USE_GRAPH").value_or(""));
|
|
|
if (!prefill_use_graph) {
|
|
|
// Do not use acl_graph for prefill.
|
|
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|