|
|
@@ -99,7 +99,7 @@ for p in range(n_parts):
|
|
|
fout.write(struct.pack("i", ftype))
|
|
|
|
|
|
# Is this correct??
|
|
|
- for i in range(32000):
|
|
|
+ for i in range(tokenizer.vocab_size()):
|
|
|
if tokenizer.is_unknown(i):
|
|
|
# "<unk>" token (translated as ??)
|
|
|
text = " \u2047 ".encode("utf-8")
|