Skip to content

Commit 5cab7c7

Browse files
Properly base64 encode PDF attachments for openai (#225)
also add descriptions for custom models
1 parent 1d8a64a commit 5cab7c7

File tree

6 files changed

+350
-168
lines changed

6 files changed

+350
-168
lines changed

packages/proxy/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
"express"
6868
],
6969
"devDependencies": {
70+
"@types/content-disposition": "^0.5.8",
7071
"@types/jsonwebtoken": "^9.0.7",
7172
"@types/node": "^20.10.5",
7273
"@types/uuid": "^9.0.7",
@@ -93,6 +94,7 @@
9394
"@opentelemetry/sdk-metrics": "^1.19.0",
9495
"ai": "2.2.37",
9596
"cache-control-parser": "^2.0.6",
97+
"content-disposition": "^0.5.4",
9698
"date-fns": "^4.1.0",
9799
"eventsource-parser": "^1.1.1",
98100
"jose": "^5.9.6",

packages/proxy/schema/models.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ export const ModelSchema = z.object({
5252
parent: z.string().nullish(),
5353
endpoint_types: z.array(z.enum(ModelEndpointType)).nullish(),
5454
locations: z.array(z.string()).nullish(),
55+
description: z.string().nullish(),
5556
});
5657

5758
export type ModelSpec = z.infer<typeof ModelSchema>;

packages/proxy/src/providers/openai.ts

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@ import {
33
ChatCompletion,
44
ChatCompletionMessageParam,
55
ChatCompletionContentPart,
6-
ChatCompletionContentPartRefusal,
76
} from "openai/resources";
87
import { base64ToUrl, convertBase64Media, convertMediaToBase64 } from "./util";
9-
import { parseFilenameFromUrl } from "..";
8+
import { parseFileMetadataFromUrl } from "../util";
109

1110
function openAIChatCompletionToChatEvent(
1211
completion: ChatCompletion,
@@ -108,7 +107,13 @@ async function normalizeOpenAIContent(
108107
case "image_url":
109108
if (convertBase64Media(content.image_url.url)) {
110109
return content;
111-
} else if (content.image_url.url.endsWith(".pdf")) {
110+
}
111+
112+
const parsed = parseFileMetadataFromUrl(content.image_url.url);
113+
if (
114+
parsed?.filename?.endsWith(".pdf") ||
115+
parsed?.contentType === "application/pdf"
116+
) {
112117
const base64 = await convertMediaToBase64({
113118
media: content.image_url.url,
114119
allowedMediaTypes: ["application/pdf"],
@@ -117,8 +122,7 @@ async function normalizeOpenAIContent(
117122
return {
118123
type: "file",
119124
file: {
120-
filename:
121-
parseFilenameFromUrl(content.image_url.url) ?? "image.pdf",
125+
filename: parsed.filename,
122126
file_data: base64ToUrl(base64),
123127
},
124128
};

packages/proxy/src/util.test.ts

Lines changed: 142 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,167 +1,206 @@
11
import { describe, expect, test } from "vitest";
2-
import { parseFilenameFromUrl } from "./util";
2+
import { parseFileMetadataFromUrl } from "./util";
33

4-
describe("parseFilenameFromUrl", () => {
4+
describe("parseFileMetadataFromUrl", () => {
55
test("handles basic URLs", () => {
6-
expect(parseFilenameFromUrl("https://example.com/file.pdf")).toBe(
7-
"file.pdf",
8-
);
9-
expect(parseFilenameFromUrl("http://foo.com/bar/example.pdf")).toBe(
10-
"example.pdf",
11-
);
12-
expect(parseFilenameFromUrl("gs://bucket/file.pdf")).toBe("file.pdf");
6+
expect(parseFileMetadataFromUrl("https://example.com/file.pdf")).toEqual({
7+
filename: "file.pdf",
8+
url: expect.any(URL),
9+
});
10+
expect(parseFileMetadataFromUrl("http://foo.com/bar/example.pdf")).toEqual({
11+
filename: "example.pdf",
12+
url: expect.any(URL),
13+
});
1314
});
1415

1516
test("handles URLs with query parameters", () => {
1617
expect(
17-
parseFilenameFromUrl("https://example.com/file.pdf?query=value"),
18-
).toBe("file.pdf");
19-
expect(parseFilenameFromUrl("http://foo.com/doc.pdf?v=1&id=123")).toBe(
20-
"doc.pdf",
21-
);
18+
parseFileMetadataFromUrl("https://example.com/file.pdf?query=value"),
19+
).toEqual({ filename: "file.pdf", url: expect.any(URL) });
2220
expect(
23-
parseFilenameFromUrl("https://site.com/download.pdf?token=abc123"),
24-
).toBe("download.pdf");
21+
parseFileMetadataFromUrl("http://foo.com/doc.pdf?v=1&id=123"),
22+
).toEqual({ filename: "doc.pdf", url: expect.any(URL) });
2523
expect(
26-
parseFilenameFromUrl(
24+
parseFileMetadataFromUrl("https://site.com/download.pdf?token=abc123"),
25+
).toEqual({ filename: "download.pdf", url: expect.any(URL) });
26+
expect(
27+
parseFileMetadataFromUrl(
2728
"http://example.com/report.pdf?token=example%20with%20spaces",
2829
),
29-
).toBe("report.pdf");
30+
).toEqual({ filename: "report.pdf", url: expect.any(URL) });
3031
});
3132

3233
test("handles filenames with spaces and special characters", () => {
33-
expect(parseFilenameFromUrl("https://example.com/my%20file.pdf")).toBe(
34-
"my file.pdf",
35-
);
36-
expect(parseFilenameFromUrl("http://foo.com/report-2023.pdf")).toBe(
37-
"report-2023.pdf",
38-
);
39-
expect(parseFilenameFromUrl("https://site.com/exa%20mple.pdf")).toBe(
40-
"exa mple.pdf",
34+
expect(
35+
parseFileMetadataFromUrl("https://example.com/my%20file.pdf"),
36+
).toEqual({ filename: "my file.pdf", url: expect.any(URL) });
37+
expect(parseFileMetadataFromUrl("http://foo.com/report-2023.pdf")).toEqual({
38+
filename: "report-2023.pdf",
39+
url: expect.any(URL),
40+
});
41+
expect(parseFileMetadataFromUrl("https://site.com/exa%20mple.pdf")).toEqual(
42+
{ filename: "exa mple.pdf", url: expect.any(URL) },
4143
);
4244
expect(
43-
parseFilenameFromUrl("http://example.com/file%20with%20spaces.pdf"),
44-
).toBe("file with spaces.pdf");
45+
parseFileMetadataFromUrl("http://example.com/file%20with%20spaces.pdf"),
46+
).toEqual({ filename: "file with spaces.pdf", url: expect.any(URL) });
4547
expect(
46-
parseFilenameFromUrl(
48+
parseFileMetadataFromUrl(
4749
"https://example.com/file-name_with.special-chars.pdf",
4850
),
49-
).toBe("file-name_with.special-chars.pdf");
51+
).toEqual({
52+
filename: "file-name_with.special-chars.pdf",
53+
url: expect.any(URL),
54+
});
5055
expect(
51-
parseFilenameFromUrl("http://site.org/file%25with%25percent.pdf"),
52-
).toBe("file%with%percent.pdf");
53-
expect(parseFilenameFromUrl("https://example.com/file+with+plus.pdf")).toBe(
54-
"file+with+plus.pdf",
55-
);
56+
parseFileMetadataFromUrl("http://site.org/file%25with%25percent.pdf"),
57+
).toEqual({ filename: "file%with%percent.pdf", url: expect.any(URL) });
58+
expect(
59+
parseFileMetadataFromUrl("https://example.com/file+with+plus.pdf"),
60+
).toEqual({ filename: "file+with+plus.pdf", url: expect.any(URL) });
5661
});
5762

5863
test("handles pathless URLs", () => {
59-
expect(parseFilenameFromUrl("https://example.pdf")).toBe("example.pdf");
60-
expect(parseFilenameFromUrl("file.pdf")).toBe("file.pdf");
61-
expect(parseFilenameFromUrl("folder/file.pdf")).toBe("file.pdf");
64+
expect(parseFileMetadataFromUrl("https://example.pdf")).toBeUndefined();
65+
expect(parseFileMetadataFromUrl("file.pdf")).toBeUndefined();
66+
expect(parseFileMetadataFromUrl("folder/file.pdf")).toBeUndefined();
6267
});
6368

6469
test("handles URLs with fragments", () => {
6570
expect(
66-
parseFilenameFromUrl("https://example.com/document.pdf#page=1"),
67-
).toBe("document.pdf");
68-
expect(parseFilenameFromUrl("http://site.com/resume.pdf#section")).toBe(
69-
"resume.pdf",
70-
);
71+
parseFileMetadataFromUrl("https://example.com/document.pdf#page=1"),
72+
).toEqual({ filename: "document.pdf", url: expect.any(URL) });
7173
expect(
72-
parseFilenameFromUrl("https://example.com/file.pdf#fragment=with=equals"),
73-
).toBe("file.pdf");
74+
parseFileMetadataFromUrl("http://site.com/resume.pdf#section"),
75+
).toEqual({ filename: "resume.pdf", url: expect.any(URL) });
76+
expect(
77+
parseFileMetadataFromUrl(
78+
"https://example.com/file.pdf#fragment=with=equals",
79+
),
80+
).toEqual({ filename: "file.pdf", url: expect.any(URL) });
7481
});
7582

7683
test("handles URLs with both query parameters and fragments", () => {
7784
expect(
78-
parseFilenameFromUrl("https://example.com/report.pdf?version=2#page=5"),
79-
).toBe("report.pdf");
85+
parseFileMetadataFromUrl(
86+
"https://example.com/report.pdf?version=2#page=5",
87+
),
88+
).toEqual({ filename: "report.pdf", url: expect.any(URL) });
8089
expect(
81-
parseFilenameFromUrl(
90+
parseFileMetadataFromUrl(
8291
"http://site.org/document.pdf?dl=true#section=summary",
8392
),
84-
).toBe("document.pdf");
93+
).toEqual({ filename: "document.pdf", url: expect.any(URL) });
8594
expect(
86-
parseFilenameFromUrl("https://example.com/file.pdf?a=1&b=2#c=3&d=4"),
87-
).toBe("file.pdf");
95+
parseFileMetadataFromUrl("https://example.com/file.pdf?a=1&b=2#c=3&d=4"),
96+
).toEqual({ filename: "file.pdf", url: expect.any(URL) });
8897
});
8998

90-
test("handles non-standard URL formats", () => {
99+
test("returns undefined for URLs with uninferrable file names", () => {
91100
expect(
92-
parseFilenameFromUrl("http://foo.com/bar/?file=example.pdf"),
101+
parseFileMetadataFromUrl("http://foo.com/bar/?file=example.pdf"),
93102
).toBeUndefined();
94-
expect(parseFilenameFromUrl("ftp://files.org/documents/sample.pdf")).toBe(
95-
"sample.pdf",
96-
);
97-
expect(parseFilenameFromUrl("s3://my-bucket/backup/archive.pdf")).toBe(
98-
"archive.pdf",
99-
);
103+
expect(parseFileMetadataFromUrl("http://foo.com/bar/")).toBeUndefined();
104+
expect(parseFileMetadataFromUrl("http://foo.com")).toBeUndefined();
105+
});
106+
107+
test("returns undefined for non-standard URL formats", () => {
100108
expect(
101-
parseFilenameFromUrl("file:///C:/Users/name/Documents/file.pdf"),
102-
).toBe("file.pdf");
109+
parseFileMetadataFromUrl("http://foo.com/bar/?file=example.pdf"),
110+
).toBeUndefined();
111+
expect(parseFileMetadataFromUrl("gs://bucket/file.pdf")).toBeUndefined();
112+
expect(
113+
parseFileMetadataFromUrl("ftp://files.org/documents/sample.pdf"),
114+
).toBeUndefined();
103115
expect(
104-
parseFilenameFromUrl(
116+
parseFileMetadataFromUrl("s3://my-bucket/backup/archive.pdf"),
117+
).toBeUndefined();
118+
expect(
119+
parseFileMetadataFromUrl("file:///C:/Users/name/Documents/file.pdf"),
120+
).toBeUndefined();
121+
expect(
122+
parseFileMetadataFromUrl(
105123
"sftp://username:[email protected]/path/to/file.pdf",
106124
),
107-
).toBe("file.pdf");
125+
).toBeUndefined();
108126
});
109127

110128
test("returns undefined for URLs without filename", () => {
111-
expect(parseFilenameFromUrl("https://example.com/")).toBeUndefined();
112-
expect(parseFilenameFromUrl("http://site.org")).toBeUndefined();
113-
expect(parseFilenameFromUrl("")).toBeUndefined();
114-
expect(parseFilenameFromUrl(" ")).toBeUndefined();
115-
expect(parseFilenameFromUrl(null as unknown as string)).toBeUndefined();
129+
expect(parseFileMetadataFromUrl("https://example.com/")).toBeUndefined();
130+
expect(parseFileMetadataFromUrl("http://site.org")).toBeUndefined();
131+
expect(parseFileMetadataFromUrl("")).toBeUndefined();
132+
expect(parseFileMetadataFromUrl(" ")).toBeUndefined();
133+
expect(parseFileMetadataFromUrl(null as unknown as string)).toBeUndefined();
116134
expect(
117-
parseFilenameFromUrl(undefined as unknown as string),
135+
parseFileMetadataFromUrl(undefined as unknown as string),
118136
).toBeUndefined();
119137
});
120138

121139
test("handles different file extensions", () => {
122-
expect(parseFilenameFromUrl("https://example.com/document.docx")).toBe(
123-
"document.docx",
124-
);
125-
expect(parseFilenameFromUrl("https://example.com/spreadsheet.xlsx")).toBe(
126-
"spreadsheet.xlsx",
127-
);
128-
expect(parseFilenameFromUrl("https://example.com/presentation.pptx")).toBe(
129-
"presentation.pptx",
130-
);
131-
expect(parseFilenameFromUrl("https://example.com/archive.zip")).toBe(
132-
"archive.zip",
133-
);
134-
expect(parseFilenameFromUrl("https://example.com/image.jpg")).toBe(
135-
"image.jpg",
136-
);
137-
expect(parseFilenameFromUrl("https://example.com/video.mp4")).toBe(
138-
"video.mp4",
139-
);
140-
expect(parseFilenameFromUrl("https://example.com/data.json")).toBe(
141-
"data.json",
142-
);
143-
expect(parseFilenameFromUrl("https://example.com/page.html")).toBe(
144-
"page.html",
145-
);
140+
expect(
141+
parseFileMetadataFromUrl("https://example.com/document.docx"),
142+
).toEqual({ filename: "document.docx", url: expect.any(URL) });
143+
expect(
144+
parseFileMetadataFromUrl("https://example.com/spreadsheet.xlsx"),
145+
).toEqual({ filename: "spreadsheet.xlsx", url: expect.any(URL) });
146+
expect(
147+
parseFileMetadataFromUrl("https://example.com/presentation.pptx"),
148+
).toEqual({ filename: "presentation.pptx", url: expect.any(URL) });
149+
expect(parseFileMetadataFromUrl("https://example.com/archive.zip")).toEqual(
150+
{ filename: "archive.zip", url: expect.any(URL) },
151+
);
152+
expect(parseFileMetadataFromUrl("https://example.com/image.jpg")).toEqual({
153+
filename: "image.jpg",
154+
url: expect.any(URL),
155+
});
156+
expect(parseFileMetadataFromUrl("https://example.com/video.mp4")).toEqual({
157+
filename: "video.mp4",
158+
url: expect.any(URL),
159+
});
160+
expect(parseFileMetadataFromUrl("https://example.com/data.json")).toEqual({
161+
filename: "data.json",
162+
url: expect.any(URL),
163+
});
164+
expect(parseFileMetadataFromUrl("https://example.com/page.html")).toEqual({
165+
filename: "page.html",
166+
url: expect.any(URL),
167+
});
146168
});
147169

148170
test("handles complex URL encodings", () => {
149171
expect(
150-
parseFilenameFromUrl(
172+
parseFileMetadataFromUrl(
151173
"https://example.com/file%20with%20spaces%20and%20%23%20symbols.pdf",
152174
),
153-
).toBe("file with spaces and # symbols.pdf");
175+
).toEqual({
176+
filename: "file with spaces and # symbols.pdf",
177+
url: expect.any(URL),
178+
});
154179
expect(
155-
parseFilenameFromUrl("https://example.com/%E6%96%87%E4%BB%B6.pdf"),
156-
).toBe("文件.pdf");
180+
parseFileMetadataFromUrl("https://example.com/%E6%96%87%E4%BB%B6.pdf"),
181+
).toEqual({ filename: "文件.pdf", url: expect.any(URL) });
157182
expect(
158-
parseFilenameFromUrl("https://example.com/r%C3%A9sum%C3%A9.pdf"),
159-
).toBe("résumé.pdf");
183+
parseFileMetadataFromUrl("https://example.com/r%C3%A9sum%C3%A9.pdf"),
184+
).toEqual({ filename: "résumé.pdf", url: expect.any(URL) });
160185
expect(
161-
parseFilenameFromUrl("https://example.com/file%2Bwith%2Bplus.pdf"),
162-
).toBe("file+with+plus.pdf");
186+
parseFileMetadataFromUrl("https://example.com/file%2Bwith%2Bplus.pdf"),
187+
).toEqual({ filename: "file+with+plus.pdf", url: expect.any(URL) });
163188
expect(
164-
parseFilenameFromUrl("https://example.com/file%3Fwith%3Fquestion.pdf"),
165-
).toBe("file?with?question.pdf");
189+
parseFileMetadataFromUrl(
190+
"https://example.com/file%3Fwith%3Fquestion.pdf",
191+
),
192+
).toEqual({ filename: "file?with?question.pdf", url: expect.any(URL) });
193+
});
194+
195+
test("handles S3 pre-signed URLs", () => {
196+
expect(
197+
parseFileMetadataFromUrl(
198+
"https://somes3subdomain.s3.amazonaws.com/files/e1ebccc2-4006-434e-a739-cba3b3fd85dd?X-Amz-Expires=86400&response-content-disposition=attachment%3B%20filename%3D%22test.pdf%22&response-content-type=application%2Fpdf&x-id=GetObject",
199+
),
200+
).toEqual({
201+
filename: "test.pdf",
202+
contentType: "application/pdf",
203+
url: expect.any(URL),
204+
});
166205
});
167206
});

0 commit comments

Comments
 (0)