|
|
@@ -149,7 +149,7 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
|
|
|
}
|
|
|
|
|
|
// amount of VRAM needed per batch size to hold temporary results
|
|
|
-// the values for 3b and 65b are not derived from testing but instead chosen conservatively
|
|
|
+// the values for 3b are not derived from testing but instead chosen conservatively
|
|
|
static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE()
|
|
|
{
|
|
|
static std::map<e_model, size_t> k_sizes = {
|
|
|
@@ -157,14 +157,14 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE()
|
|
|
{ MODEL_7B, 512ull * kB },
|
|
|
{ MODEL_13B, 640ull * kB },
|
|
|
{ MODEL_30B, 768ull * kB },
|
|
|
- { MODEL_65B, 1536ull * kB },
|
|
|
- { MODEL_70B, 1536ull * kB }, // TODO (likely can be reduced)
|
|
|
+ { MODEL_65B, 1280ull * kB },
|
|
|
+ { MODEL_70B, 1280ull * kB },
|
|
|
};
|
|
|
return k_sizes;
|
|
|
}
|
|
|
|
|
|
// amount of VRAM needed per batch size and context to hold temporary results
|
|
|
-// the values for 3b and 65b are not derived from testing but instead chosen conservatively
|
|
|
+// the values for 3b are not derived from testing but instead chosen conservatively
|
|
|
static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
|
|
|
{
|
|
|
static std::map<e_model, size_t> k_sizes = {
|
|
|
@@ -172,8 +172,8 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
|
|
|
{ MODEL_7B, 128ull },
|
|
|
{ MODEL_13B, 160ull },
|
|
|
{ MODEL_30B, 208ull },
|
|
|
- { MODEL_65B, 416ull },
|
|
|
- { MODEL_70B, 416ull }, // TODO (likely can be reduced)
|
|
|
+ { MODEL_65B, 256ull },
|
|
|
+ { MODEL_70B, 256ull },
|
|
|
};
|
|
|
return k_sizes;
|
|
|
}
|