|
|
@@ -119,6 +119,55 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device)
|
|
|
#endif
|
|
|
}
|
|
|
|
|
|
+#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
|
|
+static int ggml_cuda_parse_id(char devName[]) {
|
|
|
+ // A list of possible Target IDs can be found under the rocclr/clr repo in device.cpp
|
|
|
+ // these values are not stable so this is susceptible to breakage
|
|
|
+ // https://github.com/ROCm/clr/blob/amd-staging/rocclr/device/device.cpp
|
|
|
+ int archMajor = 0x0;
|
|
|
+ int archMinor = 0x0;
|
|
|
+ int archNum = GGML_CUDA_CC_OFFSET_AMD;
|
|
|
+ int archLen = strlen(devName);
|
|
|
+ char archName[archLen + 1];
|
|
|
+
|
|
|
+ // strip leading 'gfx' while copying into our buffer
|
|
|
+ if (archLen > 3) {
|
|
|
+ strcpy(archName, &devName[3]);
|
|
|
+ archLen -= 3;
|
|
|
+ }
|
|
|
+
|
|
|
+ // trim trailing :xnack- or :sramecc- statuses
|
|
|
+ archLen = strcspn(archName, ":");
|
|
|
+ archName[archLen] = '\0';
|
|
|
+
|
|
|
+ // tease out the version information
|
|
|
+ if (archLen > 8) {
|
|
|
+ // versions labeled generic use '-' as delimiter
|
|
|
+ // strip the trailing "-generic" then iterate through what remains
|
|
|
+ if ((strstr(archName, "-generic"))) {
|
|
|
+ archName[archLen - 8] = '\0';
|
|
|
+ char * pch;
|
|
|
+ if ((pch = strtok(archName, "-"))) {
|
|
|
+ archMajor = (int)strtoul(pch, 0, 16);
|
|
|
+ if ((pch = strtok(NULL, "-"))) {
|
|
|
+ archMinor = 0x10 * (int)strtoul(pch, 0, 16);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else if (archLen >= 3) {
|
|
|
+ // last two digits should be the minor * 0x10 + stepping
|
|
|
+ archMinor = (int)strtoul(&archName[archLen - 2], 0, 16);
|
|
|
+ archName[archLen - 2] = '\0';
|
|
|
+
|
|
|
+ // only the major version remains
|
|
|
+ archMajor = (int)strtoul(archName, 0, 16);
|
|
|
+ }
|
|
|
+ archNum += archMajor * 0x100;
|
|
|
+ archNum += archMinor;
|
|
|
+ return archNum;
|
|
|
+}
|
|
|
+#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
|
|
+
|
|
|
static ggml_cuda_device_info ggml_cuda_init() {
|
|
|
#ifdef __HIP_PLATFORM_AMD__
|
|
|
// Workaround for a rocBLAS bug when using multiple graphics cards:
|
|
|
@@ -169,7 +218,6 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|
|
|
|
|
cudaDeviceProp prop;
|
|
|
CUDA_CHECK(cudaGetDeviceProperties(&prop, id));
|
|
|
- GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n", id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
|
|
|
|
|
info.default_tensor_split[id] = total_vram;
|
|
|
total_vram += prop.totalGlobalMem;
|
|
|
@@ -178,10 +226,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
|
|
|
info.devices[id].smpb = prop.sharedMemPerBlock;
|
|
|
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
|
|
info.devices[id].smpbo = prop.sharedMemPerBlock;
|
|
|
- info.devices[id].cc = 100*prop.major + 10*prop.minor + GGML_CUDA_CC_OFFSET_AMD;
|
|
|
+
|
|
|
+ info.devices[id].cc = ggml_cuda_parse_id(prop.gcnArchName);
|
|
|
+ if ((info.devices[id].cc & 0xff00) == 0x0) {
|
|
|
+ GGML_LOG_WARN("invalid architecture ID received for device %d %s: %s cc %d.%d\n",
|
|
|
+ id, prop.name, prop.gcnArchName, prop.major, prop.minor);
|
|
|
+
|
|
|
+ // Fallback to prop.major and prop.minor
|
|
|
+ if (prop.major > 0) {
|
|
|
+ info.devices[id].cc = GGML_CUDA_CC_OFFSET_AMD + prop.major * 0x100;
|
|
|
+ info.devices[id].cc += prop.minor * 0x10;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ GGML_LOG_INFO(" Device %d: %s, %s (0x%x), VMM: %s\n",
|
|
|
+ id, prop.name, prop.gcnArchName, info.devices[id].cc & 0xffff, device_vmm ? "yes" : "no");
|
|
|
#else
|
|
|
info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
|
|
|
info.devices[id].cc = 100*prop.major + 10*prop.minor;
|
|
|
+ GGML_LOG_INFO(" Device %d: %s, compute capability %d.%d, VMM: %s\n",
|
|
|
+ id, prop.name, prop.major, prop.minor, device_vmm ? "yes" : "no");
|
|
|
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
|
|
|
}
|
|
|
|