You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
parser.add_argument("--gpu-memory-utilization", type=float, help="Fraction of VRAM vLLM may pre-allocate for KV-cache ""(passed through to vllm serve).")
1026
-
parser.add_argument(
1027
-
"--max_model_len",
1028
-
type=int,
1029
-
help="Upper bound (tokens) vLLM will allocate KV-cache for; ""passed through to vllm serve as --max-model-len.",
1030
-
)
1027
+
parser.add_argument("--max_model_len", type=int, default=16384, help="Upper bound (tokens) vLLM will allocate KV-cache for, lower if VLLM won't start")
1031
1028
1032
-
parser.add_argument("--model_max_context", type=int, default="8192", help="Maximum context length that the model was fine tuned under")
1033
1029
parser.add_argument("--target_longest_image_dim", type=int, help="Dimension on longest side to use for rendering the pdf pages", default=1288)
1034
1030
parser.add_argument("--target_anchor_text_len", type=int, help="Maximum amount of anchor text to use (characters), not used for new models", default=-1)
1035
1031
parser.add_argument("--guided_decoding", action="store_true", help="Enable guided decoding for model YAML type outputs")
0 commit comments