|
@@ -9108,6 +9108,7 @@ static void soft_max_f32(const float * x, const float * mask, float * dst, const
|
|
|
// find the sum of exps in the block
|
|
// find the sum of exps in the block
|
|
|
tmp = warp_reduce_sum(tmp, item_ct1);
|
|
tmp = warp_reduce_sum(tmp, item_ct1);
|
|
|
if (block_size > WARP_SIZE) {
|
|
if (block_size > WARP_SIZE) {
|
|
|
|
|
+ item_ct1.barrier(sycl::access::fence_space::local_space);
|
|
|
if (warp_id == 0) {
|
|
if (warp_id == 0) {
|
|
|
buf[lane_id] = 0.f;
|
|
buf[lane_id] = 0.f;
|
|
|
}
|
|
}
|