|
@@ -124,7 +124,7 @@ static __global__ void __launch_bounds__(CUDA_CONCAT_BLOCK_SIZE)
|
|
|
uint64_t nb1,
|
|
uint64_t nb1,
|
|
|
uint64_t nb2,
|
|
uint64_t nb2,
|
|
|
uint64_t nb3){
|
|
uint64_t nb3){
|
|
|
- static_assert(dim >= 0 && dim <= 3);
|
|
|
|
|
|
|
+ static_assert(dim >= 0 && dim <= 3, "dim must be in [0, 3]");
|
|
|
|
|
|
|
|
const int64_t i3 = blockIdx.z;
|
|
const int64_t i3 = blockIdx.z;
|
|
|
const int64_t i2 = blockIdx.y;
|
|
const int64_t i2 = blockIdx.y;
|