Skip to content

Commit 95cd879

Browse files
feat: semantic cache (#1643)
* setup semantic cache in unkey repo * gitignore * add action deploy * update action * update action * update actions * try alternative wrangler deploy * install * update workflow * rerun * add fe * check in fe * log * update deploy path * only use latest message * regen lockfile * use @unkey/cache * lockfile * update dependencies * lockfile: use versioned cache * remove fe * lockfile * lockfile * update workflow * update workflow * restore pnpm workspace * send data to tinybird correctly * add scripts * fix types * move app to /apps * remove app from packages * Add semantic cache to desktop sidebar * Add semantic cache to mobile sidebar * Add header to semantic caching page * Add basic logging * merge main * formatting fixes * remove action * format code * remove from sidebars * update types * clean up deps * parameterize * fix types * format * Regenerate lockfile * formatting * Refactor into multiple files * remove tsup * remove tsc build step * infer analyticsevent type * respond to feedback * lockfile * add tinybird data * fix(worker.ts): remove unused async middleware function chore(package.json): update license to AGPL-3.0 chore(package.json): remove unnecessary files field chore(package.json): update dev script in semantic-cache package chore(package.json): update dev script in api package chore(package.json): update dev script in hono package chore(package.json): update dev script in nextjs package chore(package.json): update dev script in nuxt package chore(package.json): update dev script in ratelimit package chore(package.json): update dev script in rbac package --------- Co-authored-by: chronark <[email protected]>
1 parent 9d0e767 commit 95cd879

File tree

28 files changed

+1192
-94
lines changed

28 files changed

+1192
-94
lines changed

apps/api/src/worker.ts

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -35,34 +35,6 @@ app.use("*", init());
3535
app.use("*", cors());
3636
app.use("*", metrics());
3737

38-
app.use("*", async (c, next) => {
39-
try {
40-
if (c.env.TINYBIRD_PROXY_URL) {
41-
const start = performance.now();
42-
const p = fetch(new URL("/v0/incr", c.env.TINYBIRD_PROXY_URL), {
43-
method: "POST",
44-
}).then(() => {
45-
const { metrics } = c.get("services");
46-
47-
metrics.emit({
48-
metric: "metric.koyeb.latency",
49-
// @ts-expect-error
50-
continent: c.req.raw?.cf?.continent,
51-
// @ts-expect-error
52-
colo: c.req.raw?.cf?.colo,
53-
latency: performance.now() - start,
54-
});
55-
});
56-
57-
c.executionCtx.waitUntil(p);
58-
}
59-
} catch (e) {
60-
console.error(e);
61-
}
62-
63-
return next();
64-
});
65-
6638
/**
6739
* Registering all route handlers
6840
*/

apps/dashboard/app/(app)/desktop-sidebar.tsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
BookOpen,
99
Code,
1010
Crown,
11+
DatabaseZap,
1112
GlobeLock,
1213
Loader2,
1314
type LucideIcon,
@@ -113,6 +114,13 @@ export const DesktopSidebar: React.FC<Props> = ({ workspace, className }) => {
113114
tag: <Tag label="internal" />,
114115
hidden: !workspace.features.successPage,
115116
},
117+
{
118+
icon: DatabaseZap,
119+
href: "/semantic-cache",
120+
label: "Semantic Cache",
121+
active: segments.at(0) === "semantic-cache",
122+
hidden: true,
123+
},
116124
].filter((n) => !n.hidden);
117125

118126
const firstOfNextMonth = new Date();

apps/dashboard/app/(app)/mobile-sidebar.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { Button } from "@/components/ui/button";
33
import { Sheet, SheetClose, SheetContent, SheetHeader, SheetTrigger } from "@/components/ui/sheet";
44
import { cn } from "@/lib/utils";
55
import { SignOutButton } from "@clerk/nextjs";
6-
import { BookOpen, FileJson, LogOut, Menu, Settings } from "lucide-react";
6+
import { BookOpen, DatabaseZap, FileJson, LogOut, Menu, Settings } from "lucide-react";
77
import Link from "next/link";
88
import { useRouter } from "next/navigation";
99
import { WorkspaceSwitcher } from "./team-switcher";
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { PageHeader } from "@/components/dashboard/page-header";
2+
import { Separator } from "@/components/ui/separator";
3+
4+
import {
5+
Table,
6+
TableBody,
7+
TableCaption,
8+
TableCell,
9+
TableHead,
10+
TableHeader,
11+
TableRow,
12+
} from "@/components/ui/table";
13+
14+
import { getAllSemanticCacheLogs } from "@/lib/tinybird";
15+
16+
export default async function SemanticCachePage() {
17+
const { data } = await getAllSemanticCacheLogs({ limit: 10 });
18+
return (
19+
<div>
20+
<PageHeader
21+
title="Semantic Cache"
22+
description="Faster, cheaper LLM API calls through semantic caching"
23+
/>
24+
<Separator className="my-6" />
25+
<h1 className="font-medium">Logs</h1>
26+
<Table className="mt-4">
27+
<TableCaption>View real-time logs from the semantic cache.</TableCaption>
28+
<TableHeader>
29+
<TableRow>
30+
<TableHead>Time</TableHead>
31+
<TableHead>Model</TableHead>
32+
<TableHead>Cache status</TableHead>
33+
<TableHead>Query</TableHead>
34+
<TableHead>Response</TableHead>
35+
<TableHead>Request ID</TableHead>
36+
<TableHead>Request timing</TableHead>
37+
</TableRow>
38+
</TableHeader>
39+
<TableBody>
40+
{data.map((data) => (
41+
<TableRow key={data.requestId}>
42+
<TableCell className="font-medium">{data.timestamp}</TableCell>
43+
<TableCell>{data.model}</TableCell>
44+
<TableCell>{data.cache}</TableCell>
45+
<TableCell>{data.query}</TableCell>
46+
<TableCell>{data.response}</TableCell>
47+
<TableCell>{data.requestId}</TableCell>
48+
<TableCell>{data.timing}</TableCell>
49+
</TableRow>
50+
))}
51+
</TableBody>
52+
</Table>
53+
</div>
54+
);
55+
}

apps/dashboard/lib/tinybird.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,28 @@ export const getRatelimitEvents = tb.buildPipe({
721721
},
722722
});
723723

724+
export const getAllSemanticCacheLogs = tb.buildPipe({
725+
pipe: "get_all_semantic_cache_logs__v1",
726+
parameters: z.object({
727+
limit: z.number().optional(),
728+
}),
729+
data: z.object({
730+
timestamp: z.string(),
731+
model: z.string(),
732+
stream: z.number(),
733+
query: z.string(),
734+
vector: z.array(z.number()),
735+
response: z.string(),
736+
cache: z.number(),
737+
timing: z.number(),
738+
tokens: z.number(),
739+
requestId: z.string(),
740+
}),
741+
opts: {
742+
cache: "no-store",
743+
},
744+
});
745+
724746
// public get getVerificationsByOwnerId() {
725747
// return this.client.buildPipe({
726748
// pipe: "get_verifictions_by_keySpaceId__v1",

apps/semantic-cache/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
dist
2+
node_modules
3+
.env
4+
scripts
5+
.dev.vars

apps/semantic-cache/lib/cache.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import { CloudflareStore, type MemoryStore, Namespace, createCache } from "@unkey/cache";
2+
import type { Context } from "hono";
3+
4+
import type { LLMResponse } from "../types";
5+
6+
export async function initCache(c: Context, memory: MemoryStore<string, any>) {
7+
const context = c.executionCtx;
8+
const fresh = 6_000_000;
9+
const stale = 300_000_000;
10+
11+
const cache = createCache({
12+
response: new Namespace<LLMResponse>(context, {
13+
stores: [
14+
memory,
15+
new CloudflareStore({
16+
cloudflareApiKey: c.env.CLOUDFLARE_API_KEY,
17+
zoneId: c.env.CLOUDFLARE_ZONE_ID,
18+
domain: "cache.unkey.dev",
19+
}),
20+
],
21+
fresh,
22+
stale,
23+
}),
24+
});
25+
26+
return cache;
27+
}

apps/semantic-cache/lib/streaming.ts

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
export class ManagedStream {
2+
stream: ReadableStream;
3+
reader: ReadableStreamDefaultReader<Uint8Array>;
4+
isDone: boolean;
5+
data: string;
6+
isComplete: boolean;
7+
8+
constructor(stream: ReadableStream) {
9+
this.stream = stream;
10+
this.reader = this.stream.getReader();
11+
this.isDone = false;
12+
this.data = "";
13+
this.isComplete = false;
14+
}
15+
16+
async readToEnd() {
17+
try {
18+
while (true) {
19+
const { done, value } = await this.reader.read();
20+
if (done) {
21+
this.isDone = true;
22+
break;
23+
}
24+
this.data += new TextDecoder().decode(value);
25+
}
26+
} catch (error) {
27+
console.error("Stream error:", error);
28+
this.isDone = false;
29+
} finally {
30+
this.reader.releaseLock();
31+
}
32+
return this.isDone;
33+
}
34+
35+
checkComplete() {
36+
if (this.data.includes("[DONE]")) {
37+
this.isComplete = true;
38+
}
39+
}
40+
41+
getReader() {
42+
return this.reader;
43+
}
44+
45+
getData() {
46+
return this.data;
47+
}
48+
}

apps/semantic-cache/package.json

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"name": "@unkey/semantic-cache",
3+
"version": "1.0.0",
4+
"license": "AGPL-3.0",
5+
"private": true,
6+
"keywords": ["unkey", "semantic", "cache", "ai"],
7+
"bugs": {
8+
"url": "https://github.com/unkeyed/unkey/issues"
9+
},
10+
"homepage": "https://github.com/unkeyed/unkey#readme",
11+
"author": "Dominic Eccleston [email protected]",
12+
"scripts": {
13+
"dev": "wrangler dev src/worker.ts"
14+
},
15+
"devDependencies": {
16+
"@cloudflare/workers-types": "^4.20240403.0",
17+
"@types/node": "^20.12.7",
18+
"dotenv": "^16.4.5",
19+
"typescript": "^5.3.3",
20+
"wrangler": "^3.47.0"
21+
},
22+
"dependencies": {
23+
"@chronark/zod-bird": "^0.3.9",
24+
"@cloudflare/ai": "^1.1.0",
25+
"@unkey/cache": "workspace:^",
26+
"@unkey/logs": "workspace:^",
27+
"@unkey/metrics": "workspace:^",
28+
"ai": "^3.0.23",
29+
"hono": "^4.2.7",
30+
"nanoid": "^5.0.7",
31+
"openai": "^4.35.0",
32+
"superjson": "^2.2.1",
33+
"zod": "^3.23.5"
34+
}
35+
}

apps/semantic-cache/src/analytics.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import { Tinybird } from "@chronark/zod-bird";
2+
import { z } from "zod";
3+
4+
export const eventSchema = z.object({
5+
timestamp: z.string(),
6+
model: z.string(),
7+
stream: z.boolean(),
8+
query: z.string(),
9+
vector: z.array(z.number()),
10+
response: z.string(),
11+
cache: z.boolean(),
12+
timing: z.number(),
13+
tokens: z.number(),
14+
requestId: z.string(),
15+
});
16+
17+
export class Analytics {
18+
public readonly client: Tinybird;
19+
20+
constructor(opts: {
21+
tinybirdToken: string;
22+
}) {
23+
this.client = new Tinybird({ token: opts.tinybirdToken });
24+
}
25+
26+
public get ingestLogs() {
27+
return this.client.buildIngestEndpoint({
28+
datasource: "semantic_cache__v3",
29+
event: eventSchema,
30+
});
31+
}
32+
}

0 commit comments

Comments
 (0)