|
|
@@ -5267,6 +5267,7 @@ struct ggml_tensor * ggml_concat(
|
|
|
bool is_node = false;
|
|
|
|
|
|
if (a->grad || b->grad) {
|
|
|
+ GGML_ABORT("fatal error"); // TODO: implement
|
|
|
is_node = true;
|
|
|
}
|
|
|
|
|
|
@@ -5388,6 +5389,7 @@ struct ggml_tensor * ggml_leaky_relu(
|
|
|
bool is_node = false;
|
|
|
|
|
|
if (!inplace && (a->grad)) {
|
|
|
+ GGML_ABORT("fatal error"); // TODO: not implemented
|
|
|
is_node = true;
|
|
|
}
|
|
|
|
|
|
@@ -5826,6 +5828,7 @@ static struct ggml_tensor * ggml_set_impl(
|
|
|
// make a view of the destination
|
|
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
|
|
|
|
+ GGML_ASSERT(offset < (size_t)(1 << 30));
|
|
|
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
|
|
ggml_set_op_params(result, params, sizeof(params));
|
|
|
|
|
|
@@ -6783,14 +6786,12 @@ struct ggml_tensor * ggml_rope_back(
|
|
|
GGML_ASSERT(ggml_is_vector(b));
|
|
|
GGML_ASSERT(b->type == GGML_TYPE_I32);
|
|
|
GGML_ASSERT(a->ne[2] == b->ne[0]);
|
|
|
- GGML_ASSERT(c == NULL && "freq factors not implemented yet");
|
|
|
-
|
|
|
- GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
|
|
|
|
|
|
bool is_node = false;
|
|
|
|
|
|
if (a->grad) {
|
|
|
- is_node = false; // TODO: implement backward
|
|
|
+ GGML_ASSERT(false && "backwards pass not implemented");
|
|
|
+ is_node = false;
|
|
|
}
|
|
|
|
|
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
|
|
@@ -6808,6 +6809,7 @@ struct ggml_tensor * ggml_rope_back(
|
|
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
|
|
result->src[0] = a;
|
|
|
result->src[1] = b;
|
|
|
+ result->src[2] = c;
|
|
|
|
|
|
return result;
|
|
|
}
|
|
|
@@ -7361,6 +7363,11 @@ struct ggml_tensor * ggml_argsort(
|
|
|
enum ggml_sort_order order) {
|
|
|
bool is_node = false;
|
|
|
|
|
|
+ if (a->grad) {
|
|
|
+ GGML_ABORT("fatal error"); // TODO: not implemented
|
|
|
+ is_node = true;
|
|
|
+ }
|
|
|
+
|
|
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
|
|
|
|
|
|
ggml_set_op_params_i32(result, 0, (int32_t) order);
|
|
|
@@ -10953,9 +10960,6 @@ static void ggml_compute_forward_sum_f32(
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- assert(ggml_is_scalar(dst));
|
|
|
-
|
|
|
-
|
|
|
assert(ggml_is_scalar(dst));
|
|
|
assert(src0->nb[0] == sizeof(float));
|
|
|
|
|
|
@@ -18356,14 +18360,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
|
if (src0->grad || src1->grad) {
|
|
|
GGML_ASSERT(src0->type == tensor->type);
|
|
|
GGML_ASSERT(tensor->grad->type == tensor->type);
|
|
|
- GGML_ASSERT(tensor->grad->type == src1->grad->type);
|
|
|
+ GGML_ASSERT(!src1->grad || src1->grad->type == tensor->grad->type);
|
|
|
|
|
|
tensor_grad_view = ggml_view_4d(ctx,
|
|
|
- tensor->grad,
|
|
|
- src1->grad->ne[0],
|
|
|
- src1->grad->ne[1],
|
|
|
- src1->grad->ne[2],
|
|
|
- src1->grad->ne[3],
|
|
|
+ tensor->grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
|
|
|
nb1, nb2, nb3, offset);
|
|
|
}
|
|
|
|
|
|
@@ -18432,9 +18432,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
|
|
|
|
memcpy(&offset, tensor->op_params, sizeof(offset));
|
|
|
|
|
|
- size_t nb1 = tensor->nb[1];
|
|
|
- size_t nb2 = tensor->nb[2];
|
|
|
- size_t nb3 = tensor->nb[3];
|
|
|
+ size_t nb1 = tensor->nb[1];
|
|
|
+ size_t nb2 = tensor->nb[2];
|
|
|
+ size_t nb3 = tensor->nb[3];
|
|
|
|
|
|
if (src0->type != src0->grad->type) {
|
|
|
// gradient is typically F32, but src0 could be other type
|