|
@@ -156,6 +156,7 @@ struct vk_device_struct {
|
|
|
vk::PhysicalDeviceProperties properties;
|
|
vk::PhysicalDeviceProperties properties;
|
|
|
std::string name;
|
|
std::string name;
|
|
|
uint64_t max_memory_allocation_size;
|
|
uint64_t max_memory_allocation_size;
|
|
|
|
|
+ uint64_t suballocation_block_size;
|
|
|
bool fp16;
|
|
bool fp16;
|
|
|
bool pipeline_robustness;
|
|
bool pipeline_robustness;
|
|
|
vk::Device device;
|
|
vk::Device device;
|
|
@@ -2269,6 +2270,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|
|
|
|
|
|
|
device->physical_device.getProperties2(&props2);
|
|
device->physical_device.getProperties2(&props2);
|
|
|
device->properties = props2.properties;
|
|
device->properties = props2.properties;
|
|
|
|
|
+ device->vendor_id = device->properties.vendorID;
|
|
|
|
|
|
|
|
const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
|
|
const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
|
|
|
|
|
|
|
@@ -2280,7 +2282,20 @@ static vk_device ggml_vk_get_device(size_t idx) {
|
|
|
device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
|
|
device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- device->vendor_id = device->properties.vendorID;
|
|
|
|
|
|
|
+ const char* GGML_VK_SUBALLOCATION_BLOCK_SIZE = getenv("GGML_VK_SUBALLOCATION_BLOCK_SIZE");
|
|
|
|
|
+
|
|
|
|
|
+ if (GGML_VK_SUBALLOCATION_BLOCK_SIZE != nullptr) {
|
|
|
|
|
+ device->suballocation_block_size = std::stoul(GGML_VK_SUBALLOCATION_BLOCK_SIZE);
|
|
|
|
|
+#if defined(_WIN32)
|
|
|
|
|
+ } else if (device->vendor_id == VK_VENDOR_ID_NVIDIA) {
|
|
|
|
|
+ // Limit batching of allocations to 1GB by default to avoid fragmentation issues
|
|
|
|
|
+ device->suballocation_block_size = 1024*1024*1024;
|
|
|
|
|
+#endif
|
|
|
|
|
+ } else {
|
|
|
|
|
+ device->suballocation_block_size = device->max_memory_allocation_size;
|
|
|
|
|
+ }
|
|
|
|
|
+ device->suballocation_block_size = std::min(device->suballocation_block_size, device->max_memory_allocation_size);
|
|
|
|
|
+
|
|
|
device->subgroup_size = subgroup_props.subgroupSize;
|
|
device->subgroup_size = subgroup_props.subgroupSize;
|
|
|
device->uma = device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
|
|
device->uma = device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
|
|
|
if (sm_builtins) {
|
|
if (sm_builtins) {
|
|
@@ -7561,7 +7576,7 @@ static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type
|
|
|
|
|
|
|
|
static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
|
static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
|
|
|
ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context;
|
|
ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context;
|
|
|
- return ctx->device->max_memory_allocation_size;
|
|
|
|
|
|
|
+ return ctx->device->suballocation_block_size;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|
|
static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
|