|
|
@@ -26,7 +26,7 @@ static void im2col_kernel(const float * x, T * dst, int64_t batch_offset, int64_
|
|
|
|
|
|
// make each work-item deal with more elements since sycl global range can not exceed max int
|
|
|
for (int64_t i = global_id; i < pelements; i += (work_group_size * item_ct1.get_group_range(2))) {
|
|
|
- const int64_t ksize = OW * (KH > 1 ? KW : 1);
|
|
|
+ const int64_t ksize = OW * KH;
|
|
|
const int64_t kx = i / ksize;
|
|
|
const int64_t kd = kx * ksize;
|
|
|
const int64_t ky = (i - kd) / OW;
|