|
1 | 1 | import { describe, expect, test } from "vitest";
|
2 |
| -import { parseFilenameFromUrl } from "./util"; |
| 2 | +import { parseFileMetadataFromUrl } from "./util"; |
3 | 3 |
|
4 |
| -describe("parseFilenameFromUrl", () => { |
| 4 | +describe("parseFileMetadataFromUrl", () => { |
5 | 5 | test("handles basic URLs", () => {
|
6 |
| - expect(parseFilenameFromUrl("https://example.com/file.pdf")).toBe( |
7 |
| - "file.pdf", |
8 |
| - ); |
9 |
| - expect(parseFilenameFromUrl("http://foo.com/bar/example.pdf")).toBe( |
10 |
| - "example.pdf", |
11 |
| - ); |
12 |
| - expect(parseFilenameFromUrl("gs://bucket/file.pdf")).toBe("file.pdf"); |
| 6 | + expect(parseFileMetadataFromUrl("https://example.com/file.pdf")).toEqual({ |
| 7 | + filename: "file.pdf", |
| 8 | + url: expect.any(URL), |
| 9 | + }); |
| 10 | + expect(parseFileMetadataFromUrl("http://foo.com/bar/example.pdf")).toEqual({ |
| 11 | + filename: "example.pdf", |
| 12 | + url: expect.any(URL), |
| 13 | + }); |
13 | 14 | });
|
14 | 15 |
|
15 | 16 | test("handles URLs with query parameters", () => {
|
16 | 17 | expect(
|
17 |
| - parseFilenameFromUrl("https://example.com/file.pdf?query=value"), |
18 |
| - ).toBe("file.pdf"); |
19 |
| - expect(parseFilenameFromUrl("http://foo.com/doc.pdf?v=1&id=123")).toBe( |
20 |
| - "doc.pdf", |
21 |
| - ); |
| 18 | + parseFileMetadataFromUrl("https://example.com/file.pdf?query=value"), |
| 19 | + ).toEqual({ filename: "file.pdf", url: expect.any(URL) }); |
22 | 20 | expect(
|
23 |
| - parseFilenameFromUrl("https://site.com/download.pdf?token=abc123"), |
24 |
| - ).toBe("download.pdf"); |
| 21 | + parseFileMetadataFromUrl("http://foo.com/doc.pdf?v=1&id=123"), |
| 22 | + ).toEqual({ filename: "doc.pdf", url: expect.any(URL) }); |
25 | 23 | expect(
|
26 |
| - parseFilenameFromUrl( |
| 24 | + parseFileMetadataFromUrl("https://site.com/download.pdf?token=abc123"), |
| 25 | + ).toEqual({ filename: "download.pdf", url: expect.any(URL) }); |
| 26 | + expect( |
| 27 | + parseFileMetadataFromUrl( |
27 | 28 | "http://example.com/report.pdf?token=example%20with%20spaces",
|
28 | 29 | ),
|
29 |
| - ).toBe("report.pdf"); |
| 30 | + ).toEqual({ filename: "report.pdf", url: expect.any(URL) }); |
30 | 31 | });
|
31 | 32 |
|
32 | 33 | test("handles filenames with spaces and special characters", () => {
|
33 |
| - expect(parseFilenameFromUrl("https://example.com/my%20file.pdf")).toBe( |
34 |
| - "my file.pdf", |
35 |
| - ); |
36 |
| - expect(parseFilenameFromUrl("http://foo.com/report-2023.pdf")).toBe( |
37 |
| - "report-2023.pdf", |
38 |
| - ); |
39 |
| - expect(parseFilenameFromUrl("https://site.com/exa%20mple.pdf")).toBe( |
40 |
| - "exa mple.pdf", |
| 34 | + expect( |
| 35 | + parseFileMetadataFromUrl("https://example.com/my%20file.pdf"), |
| 36 | + ).toEqual({ filename: "my file.pdf", url: expect.any(URL) }); |
| 37 | + expect(parseFileMetadataFromUrl("http://foo.com/report-2023.pdf")).toEqual({ |
| 38 | + filename: "report-2023.pdf", |
| 39 | + url: expect.any(URL), |
| 40 | + }); |
| 41 | + expect(parseFileMetadataFromUrl("https://site.com/exa%20mple.pdf")).toEqual( |
| 42 | + { filename: "exa mple.pdf", url: expect.any(URL) }, |
41 | 43 | );
|
42 | 44 | expect(
|
43 |
| - parseFilenameFromUrl("http://example.com/file%20with%20spaces.pdf"), |
44 |
| - ).toBe("file with spaces.pdf"); |
| 45 | + parseFileMetadataFromUrl("http://example.com/file%20with%20spaces.pdf"), |
| 46 | + ).toEqual({ filename: "file with spaces.pdf", url: expect.any(URL) }); |
45 | 47 | expect(
|
46 |
| - parseFilenameFromUrl( |
| 48 | + parseFileMetadataFromUrl( |
47 | 49 | "https://example.com/file-name_with.special-chars.pdf",
|
48 | 50 | ),
|
49 |
| - ).toBe("file-name_with.special-chars.pdf"); |
| 51 | + ).toEqual({ |
| 52 | + filename: "file-name_with.special-chars.pdf", |
| 53 | + url: expect.any(URL), |
| 54 | + }); |
50 | 55 | expect(
|
51 |
| - parseFilenameFromUrl("http://site.org/file%25with%25percent.pdf"), |
52 |
| - ).toBe("file%with%percent.pdf"); |
53 |
| - expect(parseFilenameFromUrl("https://example.com/file+with+plus.pdf")).toBe( |
54 |
| - "file+with+plus.pdf", |
55 |
| - ); |
| 56 | + parseFileMetadataFromUrl("http://site.org/file%25with%25percent.pdf"), |
| 57 | + ).toEqual({ filename: "file%with%percent.pdf", url: expect.any(URL) }); |
| 58 | + expect( |
| 59 | + parseFileMetadataFromUrl("https://example.com/file+with+plus.pdf"), |
| 60 | + ).toEqual({ filename: "file+with+plus.pdf", url: expect.any(URL) }); |
56 | 61 | });
|
57 | 62 |
|
58 | 63 | test("handles pathless URLs", () => {
|
59 |
| - expect(parseFilenameFromUrl("https://example.pdf")).toBe("example.pdf"); |
60 |
| - expect(parseFilenameFromUrl("file.pdf")).toBe("file.pdf"); |
61 |
| - expect(parseFilenameFromUrl("folder/file.pdf")).toBe("file.pdf"); |
| 64 | + expect(parseFileMetadataFromUrl("https://example.pdf")).toBeUndefined(); |
| 65 | + expect(parseFileMetadataFromUrl("file.pdf")).toBeUndefined(); |
| 66 | + expect(parseFileMetadataFromUrl("folder/file.pdf")).toBeUndefined(); |
62 | 67 | });
|
63 | 68 |
|
64 | 69 | test("handles URLs with fragments", () => {
|
65 | 70 | expect(
|
66 |
| - parseFilenameFromUrl("https://example.com/document.pdf#page=1"), |
67 |
| - ).toBe("document.pdf"); |
68 |
| - expect(parseFilenameFromUrl("http://site.com/resume.pdf#section")).toBe( |
69 |
| - "resume.pdf", |
70 |
| - ); |
| 71 | + parseFileMetadataFromUrl("https://example.com/document.pdf#page=1"), |
| 72 | + ).toEqual({ filename: "document.pdf", url: expect.any(URL) }); |
71 | 73 | expect(
|
72 |
| - parseFilenameFromUrl("https://example.com/file.pdf#fragment=with=equals"), |
73 |
| - ).toBe("file.pdf"); |
| 74 | + parseFileMetadataFromUrl("http://site.com/resume.pdf#section"), |
| 75 | + ).toEqual({ filename: "resume.pdf", url: expect.any(URL) }); |
| 76 | + expect( |
| 77 | + parseFileMetadataFromUrl( |
| 78 | + "https://example.com/file.pdf#fragment=with=equals", |
| 79 | + ), |
| 80 | + ).toEqual({ filename: "file.pdf", url: expect.any(URL) }); |
74 | 81 | });
|
75 | 82 |
|
76 | 83 | test("handles URLs with both query parameters and fragments", () => {
|
77 | 84 | expect(
|
78 |
| - parseFilenameFromUrl("https://example.com/report.pdf?version=2#page=5"), |
79 |
| - ).toBe("report.pdf"); |
| 85 | + parseFileMetadataFromUrl( |
| 86 | + "https://example.com/report.pdf?version=2#page=5", |
| 87 | + ), |
| 88 | + ).toEqual({ filename: "report.pdf", url: expect.any(URL) }); |
80 | 89 | expect(
|
81 |
| - parseFilenameFromUrl( |
| 90 | + parseFileMetadataFromUrl( |
82 | 91 | "http://site.org/document.pdf?dl=true#section=summary",
|
83 | 92 | ),
|
84 |
| - ).toBe("document.pdf"); |
| 93 | + ).toEqual({ filename: "document.pdf", url: expect.any(URL) }); |
85 | 94 | expect(
|
86 |
| - parseFilenameFromUrl("https://example.com/file.pdf?a=1&b=2#c=3&d=4"), |
87 |
| - ).toBe("file.pdf"); |
| 95 | + parseFileMetadataFromUrl("https://example.com/file.pdf?a=1&b=2#c=3&d=4"), |
| 96 | + ).toEqual({ filename: "file.pdf", url: expect.any(URL) }); |
88 | 97 | });
|
89 | 98 |
|
90 |
| - test("handles non-standard URL formats", () => { |
| 99 | + test("returns undefined for URLs with uninferrable file names", () => { |
91 | 100 | expect(
|
92 |
| - parseFilenameFromUrl("http://foo.com/bar/?file=example.pdf"), |
| 101 | + parseFileMetadataFromUrl("http://foo.com/bar/?file=example.pdf"), |
93 | 102 | ).toBeUndefined();
|
94 |
| - expect(parseFilenameFromUrl("ftp://files.org/documents/sample.pdf")).toBe( |
95 |
| - "sample.pdf", |
96 |
| - ); |
97 |
| - expect(parseFilenameFromUrl("s3://my-bucket/backup/archive.pdf")).toBe( |
98 |
| - "archive.pdf", |
99 |
| - ); |
| 103 | + expect(parseFileMetadataFromUrl("http://foo.com/bar/")).toBeUndefined(); |
| 104 | + expect(parseFileMetadataFromUrl("http://foo.com")).toBeUndefined(); |
| 105 | + }); |
| 106 | + |
| 107 | + test("returns undefined for non-standard URL formats", () => { |
100 | 108 | expect(
|
101 |
| - parseFilenameFromUrl("file:///C:/Users/name/Documents/file.pdf"), |
102 |
| - ).toBe("file.pdf"); |
| 109 | + parseFileMetadataFromUrl("http://foo.com/bar/?file=example.pdf"), |
| 110 | + ).toBeUndefined(); |
| 111 | + expect(parseFileMetadataFromUrl("gs://bucket/file.pdf")).toBeUndefined(); |
| 112 | + expect( |
| 113 | + parseFileMetadataFromUrl("ftp://files.org/documents/sample.pdf"), |
| 114 | + ).toBeUndefined(); |
103 | 115 | expect(
|
104 |
| - parseFilenameFromUrl( |
| 116 | + parseFileMetadataFromUrl("s3://my-bucket/backup/archive.pdf"), |
| 117 | + ).toBeUndefined(); |
| 118 | + expect( |
| 119 | + parseFileMetadataFromUrl("file:///C:/Users/name/Documents/file.pdf"), |
| 120 | + ).toBeUndefined(); |
| 121 | + expect( |
| 122 | + parseFileMetadataFromUrl( |
105 | 123 | "sftp://username:[email protected]/path/to/file.pdf",
|
106 | 124 | ),
|
107 |
| - ).toBe("file.pdf"); |
| 125 | + ).toBeUndefined(); |
108 | 126 | });
|
109 | 127 |
|
110 | 128 | test("returns undefined for URLs without filename", () => {
|
111 |
| - expect(parseFilenameFromUrl("https://example.com/")).toBeUndefined(); |
112 |
| - expect(parseFilenameFromUrl("http://site.org")).toBeUndefined(); |
113 |
| - expect(parseFilenameFromUrl("")).toBeUndefined(); |
114 |
| - expect(parseFilenameFromUrl(" ")).toBeUndefined(); |
115 |
| - expect(parseFilenameFromUrl(null as unknown as string)).toBeUndefined(); |
| 129 | + expect(parseFileMetadataFromUrl("https://example.com/")).toBeUndefined(); |
| 130 | + expect(parseFileMetadataFromUrl("http://site.org")).toBeUndefined(); |
| 131 | + expect(parseFileMetadataFromUrl("")).toBeUndefined(); |
| 132 | + expect(parseFileMetadataFromUrl(" ")).toBeUndefined(); |
| 133 | + expect(parseFileMetadataFromUrl(null as unknown as string)).toBeUndefined(); |
116 | 134 | expect(
|
117 |
| - parseFilenameFromUrl(undefined as unknown as string), |
| 135 | + parseFileMetadataFromUrl(undefined as unknown as string), |
118 | 136 | ).toBeUndefined();
|
119 | 137 | });
|
120 | 138 |
|
121 | 139 | test("handles different file extensions", () => {
|
122 |
| - expect(parseFilenameFromUrl("https://example.com/document.docx")).toBe( |
123 |
| - "document.docx", |
124 |
| - ); |
125 |
| - expect(parseFilenameFromUrl("https://example.com/spreadsheet.xlsx")).toBe( |
126 |
| - "spreadsheet.xlsx", |
127 |
| - ); |
128 |
| - expect(parseFilenameFromUrl("https://example.com/presentation.pptx")).toBe( |
129 |
| - "presentation.pptx", |
130 |
| - ); |
131 |
| - expect(parseFilenameFromUrl("https://example.com/archive.zip")).toBe( |
132 |
| - "archive.zip", |
133 |
| - ); |
134 |
| - expect(parseFilenameFromUrl("https://example.com/image.jpg")).toBe( |
135 |
| - "image.jpg", |
136 |
| - ); |
137 |
| - expect(parseFilenameFromUrl("https://example.com/video.mp4")).toBe( |
138 |
| - "video.mp4", |
139 |
| - ); |
140 |
| - expect(parseFilenameFromUrl("https://example.com/data.json")).toBe( |
141 |
| - "data.json", |
142 |
| - ); |
143 |
| - expect(parseFilenameFromUrl("https://example.com/page.html")).toBe( |
144 |
| - "page.html", |
145 |
| - ); |
| 140 | + expect( |
| 141 | + parseFileMetadataFromUrl("https://example.com/document.docx"), |
| 142 | + ).toEqual({ filename: "document.docx", url: expect.any(URL) }); |
| 143 | + expect( |
| 144 | + parseFileMetadataFromUrl("https://example.com/spreadsheet.xlsx"), |
| 145 | + ).toEqual({ filename: "spreadsheet.xlsx", url: expect.any(URL) }); |
| 146 | + expect( |
| 147 | + parseFileMetadataFromUrl("https://example.com/presentation.pptx"), |
| 148 | + ).toEqual({ filename: "presentation.pptx", url: expect.any(URL) }); |
| 149 | + expect(parseFileMetadataFromUrl("https://example.com/archive.zip")).toEqual( |
| 150 | + { filename: "archive.zip", url: expect.any(URL) }, |
| 151 | + ); |
| 152 | + expect(parseFileMetadataFromUrl("https://example.com/image.jpg")).toEqual({ |
| 153 | + filename: "image.jpg", |
| 154 | + url: expect.any(URL), |
| 155 | + }); |
| 156 | + expect(parseFileMetadataFromUrl("https://example.com/video.mp4")).toEqual({ |
| 157 | + filename: "video.mp4", |
| 158 | + url: expect.any(URL), |
| 159 | + }); |
| 160 | + expect(parseFileMetadataFromUrl("https://example.com/data.json")).toEqual({ |
| 161 | + filename: "data.json", |
| 162 | + url: expect.any(URL), |
| 163 | + }); |
| 164 | + expect(parseFileMetadataFromUrl("https://example.com/page.html")).toEqual({ |
| 165 | + filename: "page.html", |
| 166 | + url: expect.any(URL), |
| 167 | + }); |
146 | 168 | });
|
147 | 169 |
|
148 | 170 | test("handles complex URL encodings", () => {
|
149 | 171 | expect(
|
150 |
| - parseFilenameFromUrl( |
| 172 | + parseFileMetadataFromUrl( |
151 | 173 | "https://example.com/file%20with%20spaces%20and%20%23%20symbols.pdf",
|
152 | 174 | ),
|
153 |
| - ).toBe("file with spaces and # symbols.pdf"); |
| 175 | + ).toEqual({ |
| 176 | + filename: "file with spaces and # symbols.pdf", |
| 177 | + url: expect.any(URL), |
| 178 | + }); |
154 | 179 | expect(
|
155 |
| - parseFilenameFromUrl("https://example.com/%E6%96%87%E4%BB%B6.pdf"), |
156 |
| - ).toBe("文件.pdf"); |
| 180 | + parseFileMetadataFromUrl("https://example.com/%E6%96%87%E4%BB%B6.pdf"), |
| 181 | + ).toEqual({ filename: "文件.pdf", url: expect.any(URL) }); |
157 | 182 | expect(
|
158 |
| - parseFilenameFromUrl("https://example.com/r%C3%A9sum%C3%A9.pdf"), |
159 |
| - ).toBe("résumé.pdf"); |
| 183 | + parseFileMetadataFromUrl("https://example.com/r%C3%A9sum%C3%A9.pdf"), |
| 184 | + ).toEqual({ filename: "résumé.pdf", url: expect.any(URL) }); |
160 | 185 | expect(
|
161 |
| - parseFilenameFromUrl("https://example.com/file%2Bwith%2Bplus.pdf"), |
162 |
| - ).toBe("file+with+plus.pdf"); |
| 186 | + parseFileMetadataFromUrl("https://example.com/file%2Bwith%2Bplus.pdf"), |
| 187 | + ).toEqual({ filename: "file+with+plus.pdf", url: expect.any(URL) }); |
163 | 188 | expect(
|
164 |
| - parseFilenameFromUrl("https://example.com/file%3Fwith%3Fquestion.pdf"), |
165 |
| - ).toBe("file?with?question.pdf"); |
| 189 | + parseFileMetadataFromUrl( |
| 190 | + "https://example.com/file%3Fwith%3Fquestion.pdf", |
| 191 | + ), |
| 192 | + ).toEqual({ filename: "file?with?question.pdf", url: expect.any(URL) }); |
| 193 | + }); |
| 194 | + |
| 195 | + test("handles S3 pre-signed URLs", () => { |
| 196 | + expect( |
| 197 | + parseFileMetadataFromUrl( |
| 198 | + "https://somes3subdomain.s3.amazonaws.com/files/e1ebccc2-4006-434e-a739-cba3b3fd85dd?X-Amz-Expires=86400&response-content-disposition=attachment%3B%20filename%3D%22test.pdf%22&response-content-type=application%2Fpdf&x-id=GetObject", |
| 199 | + ), |
| 200 | + ).toEqual({ |
| 201 | + filename: "test.pdf", |
| 202 | + contentType: "application/pdf", |
| 203 | + url: expect.any(URL), |
| 204 | + }); |
166 | 205 | });
|
167 | 206 | });
|
0 commit comments