Skip to content

Commit c63e97f

Browse files
committed
Default max model len cleanup
1 parent 4acc85e commit c63e97f

File tree

1 file changed

+4
-8
lines changed

1 file changed

+4
-8
lines changed

olmocr/pipeline.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ async def apost(url, json_data):
207207
async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path: str, page_num: int) -> PageResult:
208208
COMPLETION_URL = f"http://localhost:{BASE_SERVER_PORT}/v1/chat/completions"
209209
MAX_RETRIES = args.max_page_retries
210+
MODEL_MAX_CONTEXT = 16384
210211
TEMPERATURE_BY_ATTEMPT = [0.1, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0]
211212
exponential_backoffs = 0
212213
local_anchor_text_len = args.target_anchor_text_len
@@ -245,10 +246,10 @@ async def process_page(args, worker_id: int, pdf_orig_path: str, pdf_local_path:
245246

246247
base_response_data = json.loads(response_body)
247248

248-
if base_response_data["usage"]["total_tokens"] > args.model_max_context:
249+
if base_response_data["usage"]["total_tokens"] > MODEL_MAX_CONTEXT:
249250
local_anchor_text_len = max(1, local_anchor_text_len // 2)
250251
logger.info(f"Reducing anchor text len to {local_anchor_text_len} for {pdf_orig_path}-{page_num}")
251-
raise ValueError("Response exceeded model_max_context, cannot use this response")
252+
raise ValueError(f"Response exceeded model_max_context of {MODEL_MAX_CONTEXT}, cannot use this response")
252253

253254
if base_response_data["choices"][0]["finish_reason"] != "stop":
254255
local_anchor_text_len = max(1, local_anchor_text_len // 2)
@@ -1023,13 +1024,8 @@ async def main():
10231024
)
10241025

10251026
parser.add_argument("--gpu-memory-utilization", type=float, help="Fraction of VRAM vLLM may pre-allocate for KV-cache " "(passed through to vllm serve).")
1026-
parser.add_argument(
1027-
"--max_model_len",
1028-
type=int,
1029-
help="Upper bound (tokens) vLLM will allocate KV-cache for; " "passed through to vllm serve as --max-model-len.",
1030-
)
1027+
parser.add_argument("--max_model_len", type=int, default=16384, help="Upper bound (tokens) vLLM will allocate KV-cache for, lower if VLLM won't start")
10311028

1032-
parser.add_argument("--model_max_context", type=int, default="8192", help="Maximum context length that the model was fine tuned under")
10331029
parser.add_argument("--target_longest_image_dim", type=int, help="Dimension on longest side to use for rendering the pdf pages", default=1288)
10341030
parser.add_argument("--target_anchor_text_len", type=int, help="Maximum amount of anchor text to use (characters), not used for new models", default=-1)
10351031
parser.add_argument("--guided_decoding", action="store_true", help="Enable guided decoding for model YAML type outputs")

0 commit comments

Comments
 (0)