6 bulan lalu · 431b2c24f3
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1867,6 +1867,12 @@ extern "C" {
 
															     enum ggml_scale_mode {
														
 
															         GGML_SCALE_MODE_NEAREST  = 0,
														
 
															         GGML_SCALE_MODE_BILINEAR = 1,
														
 
															+
														
 
															+        GGML_SCALE_MODE_COUNT
														
 
															+    };
														
 
															+
														
 
															+    enum ggml_scale_flag {
														
 
															+        GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
														
 
															     };
														
 
															     // interpolate
														
@@ -1879,14 +1885,26 @@ extern "C" {
 
															     // interpolate
														
 
															     // interpolate scale to specified dimensions
														
 
															-    GGML_API struct ggml_tensor * ggml_upscale_ext(
														
 
															+    GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
														
 
															             struct ggml_context * ctx,
														
 
															             struct ggml_tensor  * a,
														
 
															             int                   ne0,
														
 
															             int                   ne1,
														
 
															             int                   ne2,
														
 
															             int                   ne3,
														
 
															-            enum ggml_scale_mode  mode);
														
 
															+            enum ggml_scale_mode  mode),
														
 
															+        "use ggml_interpolate instead");
														
 
															+
														
 
															+    // Up- or downsamples the input to the specified size.
														
 
															+    // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
														
 
															+    GGML_API struct ggml_tensor * ggml_interpolate(
														
 
															+            struct ggml_context * ctx,
														
 
															+            struct ggml_tensor  * a,
														
 
															+            int64_t               ne0,
														
 
															+            int64_t               ne1,
														
 
															+            int64_t               ne2,
														
 
															+            int64_t               ne3,
														
 
															+            uint32_t              mode); // ggml_scale_mode [ | ggml_scale_flag...]
														
 
															     // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
														
 
															     GGML_API struct ggml_tensor * ggml_pad(
														
--- a/ggml/src/ggml-cpu/ops.cpp
+++ b/ggml/src/ggml-cpu/ops.cpp
@@ -7276,12 +7276,13 @@ static void ggml_compute_forward_upscale_f32(
 
															     GGML_TENSOR_UNARY_OP_LOCALS
														
 
															-    const float sf0 = (float)ne0/src0->ne[0];
														
 
															-    const float sf1 = (float)ne1/src0->ne[1];
														
 
															-    const float sf2 = (float)ne2/src0->ne[2];
														
 
															-    const float sf3 = (float)ne3/src0->ne[3];
														
 
															+    float sf0 = (float)ne0/src0->ne[0];
														
 
															+    float sf1 = (float)ne1/src0->ne[1];
														
 
															+    float sf2 = (float)ne2/src0->ne[2];
														
 
															+    float sf3 = (float)ne3/src0->ne[3];
														
 
															-    const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0);
														
 
															+    const int32_t mode_flags = ggml_get_op_params_i32(dst, 0);
														
 
															+    const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF);
														
 
															     if (mode == GGML_SCALE_MODE_NEAREST) {
														
 
															         for (int64_t i3 = 0; i3 < ne3; i3++) {
														
@@ -7302,8 +7303,12 @@ static void ggml_compute_forward_upscale_f32(
 
															             }
														
 
															         }
														
 
															     } else if (mode == GGML_SCALE_MODE_BILINEAR) {
														
 
															-        // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
														
 
															-        const float pixel_offset = 0.5f;
														
 
															+        float pixel_offset = 0.5f;
														
 
															+        if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) {
														
 
															+            pixel_offset = 0.0f;
														
 
															+            sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1);
														
 
															+            sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1);
														
 
															+        }
														
 
															         for (int64_t i3 = 0; i3 < ne3; i3++) {
														
 
															             const int64_t i03 = i3 / sf3;
														
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -4447,24 +4447,21 @@ struct ggml_tensor * ggml_pool_2d_back(
 
															     return result;
														
 
															 }
														
 
															-// ggml_upscale
														
 
															+// ggml_upscale / ggml_interpolate
														
 
															-static struct ggml_tensor * ggml_upscale_impl(
														
 
															+static struct ggml_tensor * ggml_interpolate_impl(
														
 
															         struct ggml_context * ctx,
														
 
															         struct ggml_tensor  * a,
														
 
															-        int                   ne0,
														
 
															-        int                   ne1,
														
 
															-        int                   ne2,
														
 
															-        int                   ne3,
														
 
															-        enum ggml_scale_mode  mode) {
														
 
															-    GGML_ASSERT(a->ne[0] <= ne0);
														
 
															-    GGML_ASSERT(a->ne[1] <= ne1);
														
 
															-    GGML_ASSERT(a->ne[2] <= ne2);
														
 
															-    GGML_ASSERT(a->ne[3] <= ne3);
														
 
															-
														
 
															+        int64_t               ne0,
														
 
															+        int64_t               ne1,
														
 
															+        int64_t               ne2,
														
 
															+        int64_t               ne3,
														
 
															+        uint32_t              mode) {
														
 
															+    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
														
 
															+    
														
 
															     struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
														
 
															-    ggml_set_op_params_i32(result, 0, mode);
														
 
															+    ggml_set_op_params_i32(result, 0, (int32_t)mode);
														
 
															     result->op     = GGML_OP_UPSCALE;
														
 
															     result->src[0] = a;
														
@@ -4477,7 +4474,8 @@ struct ggml_tensor * ggml_upscale(
 
															         struct ggml_tensor  * a,
														
 
															         int                   scale_factor,
														
 
															         enum ggml_scale_mode  mode) {
														
 
															-    return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
														
 
															+    GGML_ASSERT(scale_factor > 1);
														
 
															+    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
														
 
															 }
														
 
															 struct ggml_tensor * ggml_upscale_ext(
														
@@ -4488,7 +4486,18 @@ struct ggml_tensor * ggml_upscale_ext(
 
															         int                   ne2,
														
 
															         int                   ne3,
														
 
															         enum ggml_scale_mode  mode) {
														
 
															-    return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
														
 
															+    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
														
 
															+}
														
 
															+
														
 
															+struct ggml_tensor * ggml_interpolate(
														
 
															+        struct ggml_context * ctx,
														
 
															+        struct ggml_tensor  * a,
														
 
															+        int64_t               ne0,
														
 
															+        int64_t               ne1,
														
 
															+        int64_t               ne2,
														
 
															+        int64_t               ne3,
														
 
															+        uint32_t              mode) {
														
 
															+    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
														
 
															 }
														
 
															 // ggml_pad
														
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -3296,28 +3296,28 @@ struct test_upscale : public test_case {
 
															     }
														
 
															 };
														
 
															-// GGML_OP_UPSCALE (ext)
														
 
															-struct test_upscale_ext : public test_case {
														
 
															+// GGML_OP_UPSCALE (via ggml_interpolate)
														
 
															+struct test_interpolate : public test_case {
														
 
															     const ggml_type type;
														
 
															     const std::array<int64_t, 4> ne;
														
 
															     const std::array<int64_t, 4> ne_tgt;
														
 
															-    const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
														
 
															+    const uint32_t mode = GGML_SCALE_MODE_NEAREST;
														
 
															     std::string vars() override {
														
 
															         return VARS_TO_STR4(type, ne, ne_tgt, mode);
														
 
															     }
														
 
															-    test_upscale_ext(ggml_type type = GGML_TYPE_F32,
														
 
															+    test_interpolate(ggml_type type = GGML_TYPE_F32,
														
 
															             std::array<int64_t, 4> ne     = {2, 5,  7, 11},
														
 
															             std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
														
 
															-            ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
														
 
															+            uint32_t mode = GGML_SCALE_MODE_NEAREST)
														
 
															         : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
														
 
															     ggml_tensor * build_graph(ggml_context * ctx) override {
														
 
															         ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
														
 
															         ggml_set_name(a, "a");
														
 
															-        ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
														
 
															+        ggml_tensor * out = ggml_interpolate(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode);
														
 
															         ggml_set_name(out, "out");
														
 
															         return out;
														
@@ -4799,8 +4799,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
 
															     for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) {
														
 
															         test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
														
 
															         test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
														
 
															-        test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5,  7, 11}, {5, 7, 11, 13}, mode));
														
 
															+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5,  7, 11}, {5, 7, 11, 13}, mode));
														
 
															+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5,  7, 11}, mode));
														
 
															     }
														
 
															+    test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5,  7, 11}, {5, 7, 11, 13}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS));
														
 
															     test_cases.emplace_back(new test_sum());
														
 
															     test_cases.emplace_back(new test_sum_rows());