Browse Source

llama : Extend fallback, fix fileno for dio file, exclude case that mmap uses dio file (#18887)

Julius Tischbein 1 week ago
parent
commit
287a33017b
2 changed files with 17 additions and 7 deletions
  1. 5 1
      src/llama-mmap.cpp
  2. 12 6
      src/llama-model-loader.cpp

+ 5 - 1
src/llama-mmap.cpp

@@ -265,7 +265,8 @@ struct llama_file::impl {
                         continue;  // Interrupted by signal, retry
                     }
                     // Fallback to std::fread in case the DMA controller cannot access the buffer
-                    if (errno == EFAULT) {
+                    if (errno == EFAULT || errno == EINVAL) {
+                        LLAMA_LOG_WARN("%s: Falling back to buffered IO due to %s\n", __func__, strerror(errno));
                         auto curr_off = tell();
                         close(fd);
                         fd = -1;
@@ -384,6 +385,9 @@ int llama_file::file_id() const {
 #ifdef _WIN32
     return _fileno(pimpl->fp);
 #else
+    if (pimpl->fd != -1) {
+        return pimpl->fd;
+    }
 #if defined(fileno)
     return fileno(pimpl->fp);
 #else

+ 12 - 6
src/llama-model-loader.cpp

@@ -539,12 +539,18 @@ llama_model_loader::llama_model_loader(
     files.emplace_back(new llama_file(fname.c_str(), "rb", use_direct_io));
     contexts.emplace_back(ctx);
 
-    use_direct_io = use_direct_io && files.back()->has_direct_io();
-
-    // Disable mmap in case Direct I/O is enabled and available
-    if (use_direct_io && use_mmap) {
-        use_mmap = false;
-        LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
+    if (use_mmap && use_direct_io) {
+        if (files.back()->has_direct_io()) {
+            // Disable mmap, as DirectIO is available
+            use_mmap = false;
+            LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
+        } else {
+            // Disable DirectIO and reopen file using std::fopen for mmap
+            use_direct_io = false;
+            files.pop_back();
+            files.emplace_back(new llama_file(fname.c_str(), "rb", false));
+            LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
+        }
     }
 
     // Save tensors data offset of the main file.