braintrustdata · ankrgyl · May 31, 2025 · Apr 25, 2025 · Apr 25, 2025 · Apr 28, 2025
diff --git a/.envrc b/.envrc
@@ -0,0 +1,2 @@
+source_up_if_exists
+dotenv_if_exists
diff --git a/packages/proxy/package.json b/packages/proxy/package.json
@@ -82,7 +82,7 @@
     "@anthropic-ai/sdk": "^0.39.0",
     "@apidevtools/json-schema-ref-parser": "^11.9.1",
     "@aws-sdk/client-bedrock-runtime": "^3.738.0",
-    "@braintrust/core": "^0.0.85",
+    "@braintrust/core": "link:../../../sdk/core/js",
     "@breezystack/lamejs": "^1.2.7",
     "@google/generative-ai": "^0.24.0",
     "@opentelemetry/api": "^1.7.0",

diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts
@@ -49,10 +49,79 @@ export const modelParamToModelParam: {
   stream_options: null,
   parallel_tool_calls: null,
   response_format: null,
-  reasoning_effort: null,
+  reasoning_effort: "reasoning_effort",
   stop: null,
 };
 
+const effortToBudgetMultiplier = {
+  low: 0.2,
+  medium: 0.5,
+  high: 0.8,
+} as const;
+
+const getBudgetMultiplier = (effort: keyof typeof effortToBudgetMultiplier) => {
+  return effortToBudgetMultiplier[effort] || effortToBudgetMultiplier.low;
+};
+
+export const modelParamMappers: {
+  [name in ModelFormat]?: {
+    [param: string]: (params: any) => Record<string, unknown>;
+  };
+} = {
+  anthropic: {
+    reasoning_effort: ({
+      reasoning_effort,
+      max_tokens,
+      max_completion_tokens,
+      temperature: _,
+      ...params
+    }) => {
+      // Max tokens are inclusive of budget. If the max tokens are too low (below 1024), then the API will raise an exception.
+      const maxTokens = Math.max(
+        max_completion_tokens || max_tokens || 0,
+        1024 / effortToBudgetMultiplier.low,
+      );
+
+      const budget = getBudgetMultiplier(reasoning_effort || "low") * maxTokens;
+
+      return {
+        ...params,
+        max_tokens: maxTokens,
+        // must be set when using thinking
+        temperature: 1,
+        thinking: {
+          budget_tokens: budget,
+          type: "enabled",
+        },
+      };
+    },
+  },
+  google: {
+    reasoning_effort: ({
+      reasoning_effort,
+      max_tokens,
+      max_completion_tokens,
+      ...params
+    }) => {
+      const maxTokens = Math.max(
+        max_completion_tokens || max_tokens || 0,
+        1024 / effortToBudgetMultiplier.low,
+      );
+
+      const budget = getBudgetMultiplier(reasoning_effort || "low") * maxTokens;
+
+      return {
+        ...params,
+        thinkingConfig: {
+          thinkingBudget: budget,
+          includeThoughts: true,
+        },
+        maxOutputTokens: maxTokens,
+      };
+    },
+  },
+};
+
 export const sliderSpecs: {
   // min, max, step, required
   [name: string]: [number, number, number, boolean];
@@ -82,13 +151,15 @@ export const defaultModelParamSettings: {
     response_format: null,
     stop: undefined,
     use_cache: true,
+    reasoning_effort: "medium",
   },
   anthropic: {
     temperature: undefined,
     max_tokens: undefined,
     top_p: 0.7,
     top_k: 5,
     use_cache: true,
+    reasoning_effort: "medium",
   },
   google: {
     temperature: undefined,
@@ -121,6 +192,17 @@ export const modelProviderHasTools: {
   converse: true,
 };
 
+export const modelProviderHasReasoning: {
+  [name in ModelFormat]?: RegExp;
+} = {
+  openai: /^o[1-4]/i,
+  anthropic: /^claude-3\.7/i,
+  google: /gemini-2.0-flash$|gemini-2.5/i,
+  js: undefined,
+  window: undefined,
+  converse: undefined,
+};
+
 export const DefaultEndpointTypes: {
   [name in ModelFormat]: ModelEndpointType[];
 } = {
@@ -427,23 +509,29 @@ export function translateParams(
   toProvider: ModelFormat,
   params: Record<string, unknown>,
 ): Record<string, unknown> {
-  const translatedParams: Record<string, unknown> = {};
+  let translatedParams: Record<string, unknown> = {};
   for (const [k, v] of Object.entries(params || {})) {
     const safeValue = v ?? undefined; // Don't propagate "null" along
+    const mapper = modelParamMappers[toProvider]?.[k];
+    if (mapper) {
+      translatedParams = mapper(translatedParams);
+      continue;
+    }
+
     const translatedKey = modelParamToModelParam[k as keyof ModelParams] as
       | keyof ModelParams
       | undefined
       | null;
+
     if (translatedKey === null) {
       continue;
-    } else if (
-      translatedKey !== undefined &&
-      defaultModelParamSettings[toProvider][translatedKey] !== undefined
-    ) {
-      translatedParams[translatedKey] = safeValue;
-    } else {
-      translatedParams[k] = safeValue;
     }
+
+    const hasDefaultParam =
+      translatedKey !== undefined &&
+      defaultModelParamSettings[toProvider][translatedKey] !== undefined;
+
+    translatedParams[hasDefaultParam ? translatedKey : k] = safeValue;
   }
 
   return translatedParams;

diff --git a/packages/proxy/schema/models.ts b/packages/proxy/schema/models.ts
@@ -44,7 +44,8 @@ export const ModelSchema = z.object({
   input_cost_per_mil_tokens: z.number().nullish(),
   output_cost_per_mil_tokens: z.number().nullish(),
   displayName: z.string().nullish(),
-  o1_like: z.boolean().nullish(),
+  o1_like: z.boolean().nullish().describe('DEPRECATED use "reasoning" instead'),
+  reasoning: z.boolean().nullish(),
   experimental: z.boolean().nullish(),
   deprecated: z.boolean().nullish(),
   parent: z.string().nullish(),
@@ -159,15 +160,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     multimodal: true,
     input_cost_per_mil_tokens: 1.1,
     output_cost_per_mil_tokens: 4.4,
-    o1_like: true,
+    reasoning: true,
   },
   "o4-mini-2025-04-16": {
     format: "openai",
     flavor: "chat",
     multimodal: true,
     input_cost_per_mil_tokens: 1.1,
     output_cost_per_mil_tokens: 4.4,
-    o1_like: true,
+    reasoning: true,
     parent: "o4-mini",
   },
   "o3-mini": {
@@ -176,15 +177,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     multimodal: true,
     input_cost_per_mil_tokens: 1.1,
     output_cost_per_mil_tokens: 4.4,
-    o1_like: true,
+    reasoning: true,
   },
   "o3-mini-2025-01-31": {
     format: "openai",
     flavor: "chat",
     multimodal: true,
     input_cost_per_mil_tokens: 1.1,
     output_cost_per_mil_tokens: 4.4,
-    o1_like: true,
+    reasoning: true,
     parent: "o3-mini",
   },
   o3: {
@@ -193,15 +194,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     multimodal: true,
     input_cost_per_mil_tokens: 10.0,
     output_cost_per_mil_tokens: 40,
-    o1_like: true,
+    reasoning: true,
   },
   "o3-2025-04-16": {
     format: "openai",
     flavor: "chat",
     multimodal: true,
     input_cost_per_mil_tokens: 10.0,
     output_cost_per_mil_tokens: 40,
-    o1_like: true,
+    reasoning: true,
     parent: "o3",
   },
   o1: {
@@ -210,15 +211,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     multimodal: true,
     input_cost_per_mil_tokens: 15.0,
     output_cost_per_mil_tokens: 60,
-    o1_like: true,
+    reasoning: true,
   },
   "o1-2024-12-17": {
     format: "openai",
     flavor: "chat",
     multimodal: true,
     input_cost_per_mil_tokens: 15.0,
     output_cost_per_mil_tokens: 60,
-    o1_like: true,
+    reasoning: true,
     parent: "o1",
   },
   "o1-mini": {
@@ -227,15 +228,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     multimodal: false,
     input_cost_per_mil_tokens: 3.0,
     output_cost_per_mil_tokens: 12.0,
-    o1_like: true,
+    reasoning: true,
   },
   "o1-mini-2024-09-12": {
     format: "openai",
     flavor: "chat",
     multimodal: false,
     input_cost_per_mil_tokens: 3.0,
     output_cost_per_mil_tokens: 12.0,
-    o1_like: true,
+    reasoning: true,
     parent: "o1-mini",
   },
   "o1-pro": {
@@ -244,15 +245,15 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     multimodal: true,
     input_cost_per_mil_tokens: 150,
     output_cost_per_mil_tokens: 600,
-    o1_like: true,
+    reasoning: true,
   },
   "o1-pro-2025-03-19": {
     format: "openai",
     flavor: "chat",
     multimodal: true,
     input_cost_per_mil_tokens: 150,
     output_cost_per_mil_tokens: 600,
-    o1_like: true,
+    reasoning: true,
     parent: "o1-pro",
   },
   "chatgpt-4o-latest": {
@@ -349,8 +350,8 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     multimodal: false,
     input_cost_per_mil_tokens: 15.0,
     output_cost_per_mil_tokens: 60,
-    o1_like: true,
     experimental: true,
+    reasoning: true,
     parent: "o1",
   },
   "o1-preview-2024-09-12": {
@@ -359,8 +360,8 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     multimodal: false,
     input_cost_per_mil_tokens: 15.0,
     output_cost_per_mil_tokens: 60.0,
-    o1_like: true,
     experimental: true,
+    reasoning: true,
     parent: "o1",
   },
   "gpt-4o-search-preview": {
@@ -547,6 +548,7 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     input_cost_per_mil_tokens: 3,
     output_cost_per_mil_tokens: 15,
     displayName: "Claude 3.7 Sonnet",
+    reasoning: true,
   },
   "claude-3-7-sonnet-20250219": {
     format: "anthropic",
@@ -1917,6 +1919,7 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     output_cost_per_mil_tokens: 0.6,
     multimodal: true,
     experimental: false,
+    reasoning: true,
     displayName: "Gemini 2.5 Flash Preview",
   },
   "gemini-2.5-pro-preview-03-25": {
@@ -1926,6 +1929,7 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     output_cost_per_mil_tokens: 10,
     multimodal: true,
     experimental: false,
+    reasoning: true,
     displayName: "Gemini 2.5 Pro Preview",
   },
   "gemini-2.5-pro-exp-03-25": {
@@ -1935,6 +1939,7 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     output_cost_per_mil_tokens: 0,
     multimodal: true,
     experimental: true,
+    reasoning: true,
     displayName: "Gemini 2.5 Pro Experimental",
   },
   "gemini-2.0-flash-exp": {
@@ -2523,6 +2528,8 @@ export const AvailableModels: { [name: string]: ModelSpec } = {
     displayName: "Command Light",
   },
 
+  // TODO: add anthropic 3.7 converse
+
   // VERTEX MODELS
   "publishers/google/models/gemini-2.0-flash": {
     format: "google",

diff --git a/packages/proxy/src/index.ts b/packages/proxy/src/index.ts
@@ -1,3 +1,4 @@
 export * from "./util";
 export * from "./proxy";
 export * from "./metrics";
+export * from "./types";
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		source_up_if_exists
		dotenv_if_exists
ibolmo marked this conversation as resolved. Show resolved Hide resolved