-
Notifications
You must be signed in to change notification settings - Fork 54
Add Anthropic thinking params and responses support #214
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
f48f5a6
dce6a69
fbc05e8
19cae64
c5b50e6
342a4ea
fdb87e9
f85073b
3c7bc81
bebacfd
d43314b
ea0bacf
cbb8ec0
383514b
5250106
de802e3
3084245
b311dae
0914e78
7d5a0d2
0e4de2d
c1876db
67a5bdc
4a9eb62
3beba3b
d8eae39
7ff7188
33677b1
c6ba935
b10694a
1bd2e52
0e0ebb3
c5858dd
3562ca7
517ecf0
080535d
fdb4625
bd0b391
7106774
92ac83c
f14807e
fba1dff
7d19720
2f11f2a
f68bb95
45fb63a
4f68885
81c6968
6e661b4
c3915df
c24f911
0d570b3
0f68f3b
5755308
a0277d7
94e0a63
ba02e9d
8cd6c41
b9ddec6
56b38df
3e42887
7438e82
4981ced
c95c56a
19a2510
fcaf564
f9cd4eb
35fd2c0
b01da09
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
source_up_if_exists | ||
dotenv_if_exists | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,10 +49,79 @@ export const modelParamToModelParam: { | |
stream_options: null, | ||
parallel_tool_calls: null, | ||
response_format: null, | ||
reasoning_effort: null, | ||
reasoning_effort: "reasoning_effort", | ||
stop: null, | ||
}; | ||
|
||
const effortToBudgetMultiplier = { | ||
ibolmo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
low: 0.2, | ||
medium: 0.5, | ||
high: 0.8, | ||
} as const; | ||
|
||
const getBudgetMultiplier = (effort: keyof typeof effortToBudgetMultiplier) => { | ||
ibolmo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return effortToBudgetMultiplier[effort] || effortToBudgetMultiplier.low; | ||
}; | ||
|
||
export const modelParamMappers: { | ||
[name in ModelFormat]?: { | ||
[param: string]: (params: any) => Record<string, unknown>; | ||
}; | ||
} = { | ||
anthropic: { | ||
reasoning_effort: ({ | ||
reasoning_effort, | ||
max_tokens, | ||
max_completion_tokens, | ||
temperature: _, | ||
...params | ||
}) => { | ||
// Max tokens are inclusive of budget. If the max tokens are too low (below 1024), then the API will raise an exception. | ||
const maxTokens = Math.max( | ||
max_completion_tokens || max_tokens || 0, | ||
1024 / effortToBudgetMultiplier.low, | ||
); | ||
|
||
const budget = getBudgetMultiplier(reasoning_effort || "low") * maxTokens; | ||
|
||
return { | ||
...params, | ||
max_tokens: maxTokens, | ||
// must be set when using thinking | ||
temperature: 1, | ||
thinking: { | ||
budget_tokens: budget, | ||
type: "enabled", | ||
}, | ||
}; | ||
}, | ||
}, | ||
google: { | ||
reasoning_effort: ({ | ||
reasoning_effort, | ||
max_tokens, | ||
max_completion_tokens, | ||
...params | ||
}) => { | ||
const maxTokens = Math.max( | ||
max_completion_tokens || max_tokens || 0, | ||
1024 / effortToBudgetMultiplier.low, | ||
); | ||
|
||
const budget = getBudgetMultiplier(reasoning_effort || "low") * maxTokens; | ||
|
||
return { | ||
...params, | ||
thinkingConfig: { | ||
thinkingBudget: budget, | ||
includeThoughts: true, | ||
}, | ||
maxOutputTokens: maxTokens, | ||
}; | ||
}, | ||
}, | ||
}; | ||
|
||
export const sliderSpecs: { | ||
// min, max, step, required | ||
[name: string]: [number, number, number, boolean]; | ||
|
@@ -82,13 +151,15 @@ export const defaultModelParamSettings: { | |
response_format: null, | ||
stop: undefined, | ||
use_cache: true, | ||
reasoning_effort: "medium", | ||
}, | ||
anthropic: { | ||
temperature: undefined, | ||
max_tokens: undefined, | ||
top_p: 0.7, | ||
top_k: 5, | ||
use_cache: true, | ||
reasoning_effort: "medium", | ||
}, | ||
google: { | ||
temperature: undefined, | ||
|
@@ -121,6 +192,17 @@ export const modelProviderHasTools: { | |
converse: true, | ||
}; | ||
|
||
export const modelProviderHasReasoning: { | ||
[name in ModelFormat]?: RegExp; | ||
} = { | ||
openai: /^o[1-4]/i, | ||
anthropic: /^claude-3\.7/i, | ||
google: /gemini-2.0-flash$|gemini-2.5/i, | ||
js: undefined, | ||
window: undefined, | ||
converse: undefined, | ||
}; | ||
|
||
export const DefaultEndpointTypes: { | ||
[name in ModelFormat]: ModelEndpointType[]; | ||
} = { | ||
|
@@ -427,23 +509,29 @@ export function translateParams( | |
toProvider: ModelFormat, | ||
params: Record<string, unknown>, | ||
): Record<string, unknown> { | ||
const translatedParams: Record<string, unknown> = {}; | ||
let translatedParams: Record<string, unknown> = {}; | ||
for (const [k, v] of Object.entries(params || {})) { | ||
const safeValue = v ?? undefined; // Don't propagate "null" along | ||
const mapper = modelParamMappers[toProvider]?.[k]; | ||
if (mapper) { | ||
translatedParams = mapper(translatedParams); | ||
continue; | ||
} | ||
|
||
const translatedKey = modelParamToModelParam[k as keyof ModelParams] as | ||
| keyof ModelParams | ||
| undefined | ||
| null; | ||
|
||
if (translatedKey === null) { | ||
continue; | ||
} else if ( | ||
translatedKey !== undefined && | ||
defaultModelParamSettings[toProvider][translatedKey] !== undefined | ||
) { | ||
translatedParams[translatedKey] = safeValue; | ||
} else { | ||
translatedParams[k] = safeValue; | ||
} | ||
|
||
const hasDefaultParam = | ||
translatedKey !== undefined && | ||
defaultModelParamSettings[toProvider][translatedKey] !== undefined; | ||
|
||
translatedParams[hasDefaultParam ? translatedKey : k] = safeValue; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when would we want to set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah I haven't checked. I honestly think we need to remove this section and instead just do the translate params mappers that I started. the idea is that you get openai as input and get anthropic params as the output. This way we centralize that kind of translation to a spot vs. right now it's a pseudo translation and then have each provider decide how to grab the data from openai-like params. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes but right now this PR is doing something in between. i think we should revert this bit so we don't regress behavior and figure out the right thing to do in another PR |
||
} | ||
|
||
return translatedParams; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,7 +44,8 @@ export const ModelSchema = z.object({ | |
input_cost_per_mil_tokens: z.number().nullish(), | ||
output_cost_per_mil_tokens: z.number().nullish(), | ||
displayName: z.string().nullish(), | ||
o1_like: z.boolean().nullish(), | ||
o1_like: z.boolean().nullish().describe('DEPRECATED use "reasoning" instead'), | ||
choochootrain marked this conversation as resolved.
Show resolved
Hide resolved
|
||
reasoning: z.boolean().nullish(), | ||
experimental: z.boolean().nullish(), | ||
deprecated: z.boolean().nullish(), | ||
parent: z.string().nullish(), | ||
|
@@ -159,15 +160,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
multimodal: true, | ||
input_cost_per_mil_tokens: 1.1, | ||
output_cost_per_mil_tokens: 4.4, | ||
o1_like: true, | ||
reasoning: true, | ||
}, | ||
"o4-mini-2025-04-16": { | ||
format: "openai", | ||
flavor: "chat", | ||
multimodal: true, | ||
input_cost_per_mil_tokens: 1.1, | ||
output_cost_per_mil_tokens: 4.4, | ||
o1_like: true, | ||
reasoning: true, | ||
parent: "o4-mini", | ||
}, | ||
"o3-mini": { | ||
|
@@ -176,15 +177,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
multimodal: true, | ||
input_cost_per_mil_tokens: 1.1, | ||
output_cost_per_mil_tokens: 4.4, | ||
o1_like: true, | ||
reasoning: true, | ||
}, | ||
"o3-mini-2025-01-31": { | ||
format: "openai", | ||
flavor: "chat", | ||
multimodal: true, | ||
input_cost_per_mil_tokens: 1.1, | ||
output_cost_per_mil_tokens: 4.4, | ||
o1_like: true, | ||
reasoning: true, | ||
parent: "o3-mini", | ||
}, | ||
o3: { | ||
|
@@ -193,15 +194,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
multimodal: true, | ||
input_cost_per_mil_tokens: 10.0, | ||
output_cost_per_mil_tokens: 40, | ||
o1_like: true, | ||
reasoning: true, | ||
}, | ||
"o3-2025-04-16": { | ||
format: "openai", | ||
flavor: "chat", | ||
multimodal: true, | ||
input_cost_per_mil_tokens: 10.0, | ||
output_cost_per_mil_tokens: 40, | ||
o1_like: true, | ||
reasoning: true, | ||
parent: "o3", | ||
}, | ||
o1: { | ||
|
@@ -210,15 +211,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
multimodal: true, | ||
input_cost_per_mil_tokens: 15.0, | ||
output_cost_per_mil_tokens: 60, | ||
o1_like: true, | ||
reasoning: true, | ||
}, | ||
"o1-2024-12-17": { | ||
format: "openai", | ||
flavor: "chat", | ||
multimodal: true, | ||
input_cost_per_mil_tokens: 15.0, | ||
output_cost_per_mil_tokens: 60, | ||
o1_like: true, | ||
reasoning: true, | ||
parent: "o1", | ||
}, | ||
"o1-mini": { | ||
|
@@ -227,15 +228,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
multimodal: false, | ||
input_cost_per_mil_tokens: 3.0, | ||
output_cost_per_mil_tokens: 12.0, | ||
o1_like: true, | ||
reasoning: true, | ||
}, | ||
"o1-mini-2024-09-12": { | ||
format: "openai", | ||
flavor: "chat", | ||
multimodal: false, | ||
input_cost_per_mil_tokens: 3.0, | ||
output_cost_per_mil_tokens: 12.0, | ||
o1_like: true, | ||
reasoning: true, | ||
parent: "o1-mini", | ||
}, | ||
"o1-pro": { | ||
|
@@ -244,15 +245,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
multimodal: true, | ||
input_cost_per_mil_tokens: 150, | ||
output_cost_per_mil_tokens: 600, | ||
o1_like: true, | ||
reasoning: true, | ||
}, | ||
"o1-pro-2025-03-19": { | ||
format: "openai", | ||
flavor: "chat", | ||
multimodal: true, | ||
input_cost_per_mil_tokens: 150, | ||
output_cost_per_mil_tokens: 600, | ||
o1_like: true, | ||
reasoning: true, | ||
parent: "o1-pro", | ||
}, | ||
"chatgpt-4o-latest": { | ||
|
@@ -349,8 +350,8 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
multimodal: false, | ||
input_cost_per_mil_tokens: 15.0, | ||
output_cost_per_mil_tokens: 60, | ||
o1_like: true, | ||
experimental: true, | ||
reasoning: true, | ||
parent: "o1", | ||
}, | ||
"o1-preview-2024-09-12": { | ||
|
@@ -359,8 +360,8 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
multimodal: false, | ||
input_cost_per_mil_tokens: 15.0, | ||
output_cost_per_mil_tokens: 60.0, | ||
o1_like: true, | ||
experimental: true, | ||
reasoning: true, | ||
parent: "o1", | ||
}, | ||
"gpt-4o-search-preview": { | ||
|
@@ -547,6 +548,7 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
input_cost_per_mil_tokens: 3, | ||
output_cost_per_mil_tokens: 15, | ||
displayName: "Claude 3.7 Sonnet", | ||
reasoning: true, | ||
}, | ||
"claude-3-7-sonnet-20250219": { | ||
format: "anthropic", | ||
|
@@ -1917,6 +1919,7 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
output_cost_per_mil_tokens: 0.6, | ||
multimodal: true, | ||
experimental: false, | ||
reasoning: true, | ||
displayName: "Gemini 2.5 Flash Preview", | ||
}, | ||
"gemini-2.5-pro-preview-03-25": { | ||
|
@@ -1926,6 +1929,7 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
output_cost_per_mil_tokens: 10, | ||
multimodal: true, | ||
experimental: false, | ||
reasoning: true, | ||
displayName: "Gemini 2.5 Pro Preview", | ||
}, | ||
"gemini-2.5-pro-exp-03-25": { | ||
|
@@ -1935,6 +1939,7 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
output_cost_per_mil_tokens: 0, | ||
multimodal: true, | ||
experimental: true, | ||
reasoning: true, | ||
displayName: "Gemini 2.5 Pro Experimental", | ||
}, | ||
"gemini-2.0-flash-exp": { | ||
|
@@ -2523,6 +2528,8 @@ export const AvailableModels: { [name: string]: ModelSpec } = { | |
displayName: "Command Light", | ||
}, | ||
|
||
// TODO: add anthropic 3.7 converse | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in this PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. going to have do it as a follow-up |
||
|
||
// VERTEX MODELS | ||
"publishers/google/models/gemini-2.0-flash": { | ||
format: "google", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
export * from "./util"; | ||
export * from "./proxy"; | ||
export * from "./metrics"; | ||
export * from "./types"; |
Uh oh!
There was an error while loading. Please reload this page.