run_with_preset.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. #!/usr/bin/env python3
  2. import argparse
  3. import os
  4. import subprocess
  5. import sys
  6. import yaml
  7. CLI_ARGS_MAIN_PERPLEXITY = [
  8. "batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",
  9. "export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",
  10. "hellaswag-tasks", "ignore-eos", "in-prefix", "in-prefix-bos", "in-suffix", "instruct",
  11. "interactive", "interactive-first", "keep", "logdir", "logit-bias", "lora", "lora-base",
  12. "low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock",
  13. "model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q",
  14. "np-penalize-nl", "numa", "ppl-output-type", "ppl-stride", "presence-penalty", "prompt",
  15. "prompt-cache", "prompt-cache-all", "prompt-cache-ro", "random-prompt", "repeat-last-n",
  16. "repeat-penalty", "reverse-prompt", "rope-freq-base", "rope-freq-scale", "rope-scale", "seed",
  17. "simple-io", "tensor-split", "threads", "temp", "tfs", "top-k", "top-p", "typical",
  18. "verbose-prompt"
  19. ]
  20. CLI_ARGS_LLAMA_BENCH = [
  21. "batch-size", "memory-f32", "low-vram", "model", "mul-mat-q", "n-gen", "n-gpu-layers",
  22. "n-prompt", "output", "repetitions", "tensor-split", "threads", "verbose"
  23. ]
  24. CLI_ARGS_SERVER = [
  25. "alias", "batch-size", "ctx-size", "embedding", "host", "memory-f32", "lora", "lora-base",
  26. "low-vram", "main-gpu", "mlock", "model", "n-gpu-layers", "n-probs", "no-mmap", "no-mul-mat-q",
  27. "numa", "path", "port", "rope-freq-base", "timeout", "rope-freq-scale", "tensor-split",
  28. "threads", "verbose"
  29. ]
  30. description = """Run llama.cpp binaries with presets from YAML file(s).
  31. To specify which binary should be run, specify the "binary" property (main, perplexity, llama-bench, and server are supported).
  32. To get a preset file template, run a llama.cpp binary with the "--logdir" CLI argument.
  33. Formatting considerations:
  34. - The YAML property names are the same as the CLI argument names of the corresponding binary.
  35. - Properties must use the long name of their corresponding llama.cpp CLI arguments.
  36. - Like the llama.cpp binaries the property names do not differentiate between hyphens and underscores.
  37. - Flags must be defined as "<PROPERTY_NAME>: true" to be effective.
  38. - To define the logit_bias property, the expected format is "<TOKEN_ID>: <BIAS>" in the "logit_bias" namespace.
  39. - To define multiple "reverse_prompt" properties simultaneously the expected format is a list of strings.
  40. - To define a tensor split, pass a list of floats.
  41. """
  42. usage = "run_with_preset.py [-h] [yaml_files ...] [--<ARG_NAME> <ARG_VALUE> ...]"
  43. epilog = (" --<ARG_NAME> specify additional CLI ars to be passed to the binary (override all preset files). "
  44. "Unknown args will be ignored.")
  45. parser = argparse.ArgumentParser(
  46. description=description, usage=usage, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter)
  47. parser.add_argument("-bin", "--binary", help="The binary to run.")
  48. parser.add_argument("yaml_files", nargs="*",
  49. help="Arbitrary number of YAML files from which to read preset values. "
  50. "If two files specify the same values the later one will be used.")
  51. known_args, unknown_args = parser.parse_known_args()
  52. if not known_args.yaml_files and not unknown_args:
  53. parser.print_help()
  54. sys.exit(0)
  55. props = dict()
  56. for yaml_file in known_args.yaml_files:
  57. with open(yaml_file, "r") as f:
  58. props.update(yaml.load(f, yaml.SafeLoader))
  59. props = {prop.replace("_", "-"): val for prop, val in props.items()}
  60. binary = props.pop("binary", "main")
  61. if known_args.binary:
  62. binary = known_args.binary
  63. if os.path.exists(f"./{binary}"):
  64. binary = f"./{binary}"
  65. if binary.lower().endswith("main") or binary.lower().endswith("perplexity"):
  66. cli_args = CLI_ARGS_MAIN_PERPLEXITY
  67. elif binary.lower().endswith("llama-bench"):
  68. cli_args = CLI_ARGS_LLAMA_BENCH
  69. elif binary.lower().endswith("server"):
  70. cli_args = CLI_ARGS_SERVER
  71. else:
  72. print(f"Unknown binary: {binary}")
  73. sys.exit(1)
  74. command_list = [binary]
  75. for cli_arg in cli_args:
  76. value = props.pop(cli_arg, None)
  77. if not value or value == -1:
  78. continue
  79. if cli_arg == "logit-bias":
  80. for token, bias in value.items():
  81. command_list.append("--logit-bias")
  82. command_list.append(f"{token}{bias:+}")
  83. continue
  84. if cli_arg == "reverse-prompt" and not isinstance(value, str):
  85. for rp in value:
  86. command_list.append("--reverse-prompt")
  87. command_list.append(str(rp))
  88. continue
  89. command_list.append(f"--{cli_arg}")
  90. if cli_arg == "tensor-split":
  91. command_list.append(",".join([str(v) for v in value]))
  92. continue
  93. value = str(value)
  94. if value != "True":
  95. command_list.append(str(value))
  96. num_unused = len(props)
  97. if num_unused > 10:
  98. print(f"The preset file contained a total of {num_unused} unused properties.")
  99. elif num_unused > 0:
  100. print("The preset file contained the following unused properties:")
  101. for prop, value in props.items():
  102. print(f" {prop}: {value}")
  103. command_list += unknown_args
  104. sp = subprocess.Popen(command_list)
  105. while sp.returncode is None:
  106. try:
  107. sp.wait()
  108. except KeyboardInterrupt:
  109. pass
  110. sys.exit(sp.returncode)