|
@@ -14413,13 +14413,29 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
|
|
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context;
|
|
|
const vk_device& device = ggml_vk_get_device(ctx->device);
|
|
const vk_device& device = ggml_vk_get_device(ctx->device);
|
|
|
|
|
|
|
|
|
|
+ const bool uses_bda = (op->op == GGML_OP_IM2COL || op->op == GGML_OP_IM2COL_3D) &&
|
|
|
|
|
+ device->shader_int64 && device->buffer_device_address;
|
|
|
|
|
+
|
|
|
|
|
+ auto const & tensor_size_supported = [&](size_t tensor_size) {
|
|
|
|
|
+ if (tensor_size > device->max_buffer_size) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ // For im2col shaders using BDA, maxStorageBufferRange limit doesn't apply.
|
|
|
|
|
+ // If shader64BitIndexing is enabled, maxStorageBufferRange limit doesn't apply.
|
|
|
|
|
+ if (!uses_bda && !device->shader_64b_indexing) {
|
|
|
|
|
+ if (tensor_size > device->properties.limits.maxStorageBufferRange) {
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return true;
|
|
|
|
|
+ };
|
|
|
// reject any tensors larger than the max buffer size
|
|
// reject any tensors larger than the max buffer size
|
|
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
|
|
- if (op->src[i] && ggml_nbytes(op->src[i]) > device->max_buffer_size) {
|
|
|
|
|
|
|
+ if (op->src[i] && !tensor_size_supported(ggml_nbytes(op->src[i]))) {
|
|
|
return false;
|
|
return false;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- if (ggml_nbytes(op) > device->max_buffer_size) {
|
|
|
|
|
|
|
+ if (!tensor_size_supported(ggml_nbytes(op))) {
|
|
|
return false;
|
|
return false;
|
|
|
}
|
|
}
|
|
|
|
|
|