|
@@ -110,7 +110,7 @@ struct llama_file::impl {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- void read_raw(void * ptr, size_t len) const {
|
|
|
|
|
|
|
+ void read_raw(void * ptr, size_t len) {
|
|
|
size_t bytes_read = 0;
|
|
size_t bytes_read = 0;
|
|
|
while (bytes_read < len) {
|
|
while (bytes_read < len) {
|
|
|
size_t chunk_size = std::min<size_t>(len - bytes_read, 64*1024*1024);
|
|
size_t chunk_size = std::min<size_t>(len - bytes_read, 64*1024*1024);
|
|
@@ -127,7 +127,7 @@ struct llama_file::impl {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- uint32_t read_u32() const {
|
|
|
|
|
|
|
+ uint32_t read_u32() {
|
|
|
uint32_t val;
|
|
uint32_t val;
|
|
|
read_raw(&val, sizeof(val));
|
|
read_raw(&val, sizeof(val));
|
|
|
return val;
|
|
return val;
|
|
@@ -154,8 +154,8 @@ struct llama_file::impl {
|
|
|
write_raw(&val, sizeof(val));
|
|
write_raw(&val, sizeof(val));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- void read_aligned_chunk(size_t offset, void * dest, size_t size) const {
|
|
|
|
|
- throw std::runtime_error("DirectIO is not implemented on Windows.");
|
|
|
|
|
|
|
+ bool has_direct_io() const {
|
|
|
|
|
+ return true;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
~impl() {
|
|
~impl() {
|
|
@@ -164,33 +164,45 @@ struct llama_file::impl {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
#else
|
|
#else
|
|
|
- impl(const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false) {
|
|
|
|
|
|
|
+ impl(const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false) : fname(fname) {
|
|
|
#ifdef __linux__
|
|
#ifdef __linux__
|
|
|
// Try unbuffered I/O for read only
|
|
// Try unbuffered I/O for read only
|
|
|
if (use_direct_io && std::strcmp(mode, "rb") == 0) {
|
|
if (use_direct_io && std::strcmp(mode, "rb") == 0) {
|
|
|
- fd = open(fname, O_RDONLY | O_DIRECT);
|
|
|
|
|
|
|
+ if (init_fd()) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ LLAMA_LOG_WARN("Failed to open file '%s' with error: %s. Falling back to buffered I/O",
|
|
|
|
|
+ fname, strerror(errno));
|
|
|
|
|
+ }
|
|
|
|
|
+#endif
|
|
|
|
|
+ init_fp(mode);
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- if (fd != -1) {
|
|
|
|
|
- struct stat file_stats{};
|
|
|
|
|
- fstat(fd, &file_stats);
|
|
|
|
|
|
|
+#ifdef __linux__
|
|
|
|
|
+ bool init_fd() {
|
|
|
|
|
+ fd = open(fname.c_str(), O_RDONLY | O_DIRECT);
|
|
|
|
|
|
|
|
- size = file_stats.st_size;
|
|
|
|
|
- alignment = file_stats.st_blksize;
|
|
|
|
|
|
|
+ if (fd != -1) {
|
|
|
|
|
+ struct stat file_stats{};
|
|
|
|
|
+ fstat(fd, &file_stats);
|
|
|
|
|
|
|
|
- off_t ret = lseek(fd, 0, SEEK_SET);
|
|
|
|
|
- if (ret == -1) {
|
|
|
|
|
- throw std::runtime_error(format("seek error: %s", strerror(errno)));
|
|
|
|
|
- }
|
|
|
|
|
- return;
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ size = file_stats.st_size;
|
|
|
|
|
+ alignment = file_stats.st_blksize;
|
|
|
|
|
|
|
|
- LLAMA_LOG_WARN("Failed to open model %s with error: %s. Falling back to buffered I/O",
|
|
|
|
|
- fname, strerror(errno));
|
|
|
|
|
|
|
+ off_t ret = lseek(fd, 0, SEEK_SET);
|
|
|
|
|
+ if (ret == -1) {
|
|
|
|
|
+ throw std::runtime_error(format("seek error: %s", strerror(errno)));
|
|
|
|
|
+ }
|
|
|
|
|
+ return true;
|
|
|
}
|
|
}
|
|
|
|
|
+ return false;
|
|
|
|
|
+ }
|
|
|
#endif
|
|
#endif
|
|
|
- fp = ggml_fopen(fname, mode);
|
|
|
|
|
|
|
+
|
|
|
|
|
+ void init_fp(const char * mode) {
|
|
|
|
|
+ fp = ggml_fopen(fname.c_str(), mode);
|
|
|
if (fp == NULL) {
|
|
if (fp == NULL) {
|
|
|
- throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
|
|
|
|
|
|
|
+ throw std::runtime_error(format("failed to open %s: %s", fname.c_str(), strerror(errno)));
|
|
|
}
|
|
}
|
|
|
seek(0, SEEK_END);
|
|
seek(0, SEEK_END);
|
|
|
size = tell();
|
|
size = tell();
|
|
@@ -226,7 +238,7 @@ struct llama_file::impl {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- void read_raw(void * ptr, size_t len) const {
|
|
|
|
|
|
|
+ void read_raw_unsafe(void * ptr, size_t len) {
|
|
|
if (len == 0) {
|
|
if (len == 0) {
|
|
|
return;
|
|
return;
|
|
|
}
|
|
}
|
|
@@ -249,6 +261,17 @@ struct llama_file::impl {
|
|
|
if (errno == EINTR) {
|
|
if (errno == EINTR) {
|
|
|
continue; // Interrupted by signal, retry
|
|
continue; // Interrupted by signal, retry
|
|
|
}
|
|
}
|
|
|
|
|
+ // Fallback to std::fread in case the DMA controller cannot access the buffer
|
|
|
|
|
+ if (errno == EFAULT) {
|
|
|
|
|
+ auto curr_off = tell();
|
|
|
|
|
+ close(fd);
|
|
|
|
|
+ fd = -1;
|
|
|
|
|
+ alignment = 1;
|
|
|
|
|
+ init_fp("rb");
|
|
|
|
|
+ seek(curr_off, SEEK_SET);
|
|
|
|
|
+ read_raw_unsafe(ptr, len);
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
|
throw std::runtime_error(format("read error: %s", strerror(errno)));
|
|
|
}
|
|
}
|
|
|
if (ret == 0) {
|
|
if (ret == 0) {
|
|
@@ -266,7 +289,8 @@ struct llama_file::impl {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- void read_aligned_chunk(size_t offset, void * dest, size_t size) const {
|
|
|
|
|
|
|
+ void read_aligned_chunk(void * dest, size_t size) {
|
|
|
|
|
+ size_t offset = tell();
|
|
|
off_t aligned_offset = offset & ~(alignment - 1);
|
|
off_t aligned_offset = offset & ~(alignment - 1);
|
|
|
off_t offset_from_alignment = offset - aligned_offset;
|
|
off_t offset_from_alignment = offset - aligned_offset;
|
|
|
size_t bytes_to_read = (offset_from_alignment + size + alignment - 1) & ~(alignment - 1);
|
|
size_t bytes_to_read = (offset_from_alignment + size + alignment - 1) & ~(alignment - 1);
|
|
@@ -283,13 +307,21 @@ struct llama_file::impl {
|
|
|
std::unique_ptr<void, aligned_buffer_deleter> buffer(raw_buffer);
|
|
std::unique_ptr<void, aligned_buffer_deleter> buffer(raw_buffer);
|
|
|
|
|
|
|
|
seek(aligned_offset, SEEK_SET);
|
|
seek(aligned_offset, SEEK_SET);
|
|
|
- read_raw(buffer.get(), bytes_to_read);
|
|
|
|
|
|
|
+ read_raw_unsafe(buffer.get(), bytes_to_read);
|
|
|
|
|
|
|
|
uintptr_t actual_data = reinterpret_cast<uintptr_t>(buffer.get()) + offset_from_alignment;
|
|
uintptr_t actual_data = reinterpret_cast<uintptr_t>(buffer.get()) + offset_from_alignment;
|
|
|
memcpy(dest, reinterpret_cast<void *>(actual_data), size);
|
|
memcpy(dest, reinterpret_cast<void *>(actual_data), size);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- uint32_t read_u32() const {
|
|
|
|
|
|
|
+ void read_raw(void * ptr, size_t len) {
|
|
|
|
|
+ if (has_direct_io()) {
|
|
|
|
|
+ read_aligned_chunk(ptr, len);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ read_raw_unsafe(ptr, len);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ uint32_t read_u32() {
|
|
|
uint32_t ret;
|
|
uint32_t ret;
|
|
|
read_raw(&ret, sizeof(ret));
|
|
read_raw(&ret, sizeof(ret));
|
|
|
return ret;
|
|
return ret;
|
|
@@ -310,6 +342,10 @@ struct llama_file::impl {
|
|
|
write_raw(&val, sizeof(val));
|
|
write_raw(&val, sizeof(val));
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ bool has_direct_io() const {
|
|
|
|
|
+ return fd != -1 && alignment > 1;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
~impl() {
|
|
~impl() {
|
|
|
if (fd != -1) {
|
|
if (fd != -1) {
|
|
|
close(fd);
|
|
close(fd);
|
|
@@ -318,17 +354,9 @@ struct llama_file::impl {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
int fd = -1;
|
|
int fd = -1;
|
|
|
|
|
+ std::string fname;
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
- void read_raw_at(void * ptr, size_t len, size_t offset) const {
|
|
|
|
|
- if (alignment != 1) {
|
|
|
|
|
- read_aligned_chunk(offset, ptr, len);
|
|
|
|
|
- } else {
|
|
|
|
|
- seek(offset, SEEK_SET);
|
|
|
|
|
- read_raw(ptr, len);
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
size_t read_alignment() const {
|
|
size_t read_alignment() const {
|
|
|
return alignment;
|
|
return alignment;
|
|
|
}
|
|
}
|
|
@@ -347,6 +375,7 @@ size_t llama_file::tell() const { return pimpl->tell(); }
|
|
|
size_t llama_file::size() const { return pimpl->size; }
|
|
size_t llama_file::size() const { return pimpl->size; }
|
|
|
|
|
|
|
|
size_t llama_file::read_alignment() const { return pimpl->read_alignment(); }
|
|
size_t llama_file::read_alignment() const { return pimpl->read_alignment(); }
|
|
|
|
|
+bool llama_file::has_direct_io() const { return pimpl->has_direct_io(); }
|
|
|
|
|
|
|
|
int llama_file::file_id() const {
|
|
int llama_file::file_id() const {
|
|
|
#ifdef _WIN32
|
|
#ifdef _WIN32
|
|
@@ -361,10 +390,14 @@ int llama_file::file_id() const {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void llama_file::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); }
|
|
void llama_file::seek(size_t offset, int whence) const { pimpl->seek(offset, whence); }
|
|
|
-void llama_file::read_raw(void * ptr, size_t len) const { pimpl->read_raw(ptr, len); }
|
|
|
|
|
-void llama_file::read_raw_at(void * ptr, size_t len, size_t offset) const { pimpl->read_raw_at(ptr, len, offset); }
|
|
|
|
|
|
|
+void llama_file::read_raw(void * ptr, size_t len) { pimpl->read_raw(ptr, len); }
|
|
|
|
|
+#ifdef _WIN32
|
|
|
|
|
+void llama_file::read_raw_unsafe(void * ptr, size_t len) { pimpl->read_raw(ptr, len); }
|
|
|
|
|
+#else
|
|
|
|
|
+void llama_file::read_raw_unsafe(void * ptr, size_t len) { pimpl->read_raw_unsafe(ptr, len); }
|
|
|
|
|
+#endif
|
|
|
|
|
|
|
|
-uint32_t llama_file::read_u32() const { return pimpl->read_u32(); }
|
|
|
|
|
|
|
+uint32_t llama_file::read_u32() { return pimpl->read_u32(); }
|
|
|
|
|
|
|
|
void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
|
|
void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
|
|
|
void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
|
|
void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
|