|
@@ -2478,6 +2478,7 @@ static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_p
|
|
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
|
graph_node_properties->src_address[i] = node->src[i] ? node->src[i]->data : nullptr;
|
|
graph_node_properties->src_address[i] = node->src[i] ? node->src[i]->data : nullptr;
|
|
|
}
|
|
}
|
|
|
|
|
+ memcpy(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
|
|
static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
|
|
@@ -2509,6 +2510,12 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
|
|
|
return false;
|
|
return false;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ if (node->op == GGML_OP_SCALE &&
|
|
|
|
|
+ memcmp(graph_node_properties->op_params, node->op_params, GGML_MAX_OP_PARAMS) != 0) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
return true;
|
|
return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -2720,7 +2727,9 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
|
|
|
// First call with null argument gets number of nodes in graph
|
|
// First call with null argument gets number of nodes in graph
|
|
|
CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, nullptr, &cuda_ctx->cuda_graph->num_nodes));
|
|
CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, nullptr, &cuda_ctx->cuda_graph->num_nodes));
|
|
|
// Subsequent call with non-null argument gets nodes
|
|
// Subsequent call with non-null argument gets nodes
|
|
|
|
|
+ cuda_ctx->cuda_graph->nodes.clear();
|
|
|
cuda_ctx->cuda_graph->nodes.resize(cuda_ctx->cuda_graph->num_nodes);
|
|
cuda_ctx->cuda_graph->nodes.resize(cuda_ctx->cuda_graph->num_nodes);
|
|
|
|
|
+ cuda_ctx->cuda_graph->params.clear();
|
|
|
cuda_ctx->cuda_graph->params.resize(cuda_ctx->cuda_graph->num_nodes);
|
|
cuda_ctx->cuda_graph->params.resize(cuda_ctx->cuda_graph->num_nodes);
|
|
|
if (cuda_ctx->cuda_graph->num_nodes > 0) {
|
|
if (cuda_ctx->cuda_graph->num_nodes > 0) {
|
|
|
CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, cuda_ctx->cuda_graph->nodes.data(), &cuda_ctx->cuda_graph->num_nodes));
|
|
CUDA_CHECK(cudaGraphGetNodes(cuda_ctx->cuda_graph->graph, cuda_ctx->cuda_graph->nodes.data(), &cuda_ctx->cuda_graph->num_nodes));
|