|
|
@@ -89,6 +89,13 @@ int main(int argc, char ** argv) {
|
|
|
common_init();
|
|
|
|
|
|
params.embedding = true;
|
|
|
+
|
|
|
+ // utilize the full context
|
|
|
+ if (params.n_batch < params.n_ctx) {
|
|
|
+ LOG_WRN("%s: setting batch size to %d\n", __func__, params.n_ctx);
|
|
|
+ params.n_batch = params.n_ctx;
|
|
|
+ }
|
|
|
+
|
|
|
// For non-causal models, batch size must be equal to ubatch size
|
|
|
params.n_ubatch = params.n_batch;
|
|
|
|
|
|
@@ -134,7 +141,6 @@ int main(int argc, char ** argv) {
|
|
|
|
|
|
// max batch size
|
|
|
const uint64_t n_batch = params.n_batch;
|
|
|
- GGML_ASSERT(params.n_batch >= params.n_ctx);
|
|
|
|
|
|
// tokenize the prompts and trim
|
|
|
std::vector<std::vector<int32_t>> inputs;
|