-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Basic instrumentation of Cloudflare bindings (AI, KV, R2, D1) (#196)
* Prototype proxying cloudflare bindings (AI and R2) * Update comment on measuredBinding * Fix typescript error * Add goosify sample app with every cf binding imaginable * Set up local D1 with drizzle * Return geese from goosify (and format code) * Try implementing a d1 binding * Remove cl * Format * Rename some attrs * Start adding more attributes depending on the args to the binding, etc * Proxy kv * Finish D1 instrumentation * Format * Clean up cf binding code and move to patch module * Use constants for cf binding attributes * Fix ts-expect-error * Try recording non-binary responses * Add a German Gans * Add showcase request generation app * Add function calling cloudflare ai example * Rename sample-apps to examples * Update ai request generation example * Turn off cloudflare binding instrumentation by default, and add it back in for the example apps * Update dev vars example for goosify
- Loading branch information
Showing
44 changed files
with
1,731 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
FPX_ENDPOINT=http://localhost:8788/v1/traces |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Request Parameter Generation with Hermes | ||
|
||
This example shows how to use a Cloudflare Workers AI model that supports function calling in order to generate request parameters for a Hono route. It uses the [Hermes](https://huggingface.co/nousresearch/hermes-2-pro-mistral-7b) model, which is a Mistral-based model that supports function calling. | ||
|
||
Fiberplane Studio is used to add timing information to the request. Instrumentation of the Cloudflare `AI` binding should happen automagically. | ||
|
||
## Running Locally | ||
|
||
You will need a Cloudflare account in order to run this locally, since AI inference is billed. | ||
|
||
```sh | ||
pnpm i | ||
pnpm dev | ||
``` | ||
|
||
Then, you can inspect the request and response in Fiberplane Studio. | ||
|
||
```sh | ||
npx @fiberplane/studio | ||
``` | ||
|
||
Test one of the following JSON request bodies against the `POST /` route, and you'll see structured output describing a sample HTTP request. | ||
|
||
You can adjust query parameters like `temperature` in the request query params. | ||
|
||
```json | ||
{ | ||
"prompt": "GET /users/:id" | ||
} | ||
``` | ||
|
||
```json | ||
{ | ||
"prompt": "GET /users/:id" | ||
} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{ | ||
"name": "request-generation-showcase", | ||
"scripts": { | ||
"dev": "wrangler dev src/index.ts", | ||
"deploy": "wrangler deploy --minify src/index.ts" | ||
}, | ||
"dependencies": { | ||
"@fiberplane/hono-otel": "workspace:*", | ||
"@langchain/core": "^0.2.18", | ||
"hono": "^4.5.9" | ||
}, | ||
"devDependencies": { | ||
"@cloudflare/workers-types": "^4.20240821.1", | ||
"wrangler": "^3.72.3" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
import { instrument } from "@fiberplane/hono-otel"; | ||
import { Hono } from "hono"; | ||
import { getSystemPrompt } from "./prompts"; | ||
import { makeRequestToolHermes } from "./tools"; | ||
|
||
type Bindings = { | ||
DATABASE_URL: string; | ||
// Cloudflare Workers AI binding | ||
// enabled in wrangler.toml with: | ||
// | ||
// > [ai] | ||
// > binding = "AI" | ||
AI: Ai; | ||
}; | ||
|
||
const app = new Hono<{ Bindings: Bindings }>(); | ||
|
||
app.post("/", async (c) => { | ||
const temperature = parseTemperature(c.req.query("temperature"), 0.12); | ||
const body = await c.req.json(); | ||
const inferenceResult = await runInference( | ||
c.env.AI, | ||
body.prompt, | ||
temperature, | ||
); | ||
|
||
// We are not using streaming outputs, but just in case, handle the stream here | ||
if (inferenceResult instanceof ReadableStream) { | ||
return c.json( | ||
{ | ||
message: "Unexpected inference result (stream)", | ||
}, | ||
500, | ||
); | ||
} | ||
|
||
// We are theoretically enforcing a tool call, so this should not happen | ||
if (inferenceResult.response != null) { | ||
return c.json( | ||
{ | ||
message: "Unexpected inference result (text)", | ||
}, | ||
500, | ||
); | ||
} | ||
|
||
// Parse the tool call | ||
const makeRequestCall = inferenceResult.tool_calls?.[0]; | ||
const requestDescriptor = makeRequestCall?.arguments; | ||
|
||
// TODO - Validate the request descriptor against the JSON Schema from the tool definition | ||
if (!isObjectGuard(requestDescriptor)) { | ||
return c.json( | ||
{ | ||
message: "Invalid request descriptor", | ||
}, | ||
500, | ||
); | ||
} | ||
|
||
console.log("requestDescriptor", JSON.stringify(requestDescriptor, null, 2)); | ||
|
||
return c.json(requestDescriptor); | ||
}); | ||
|
||
export default instrument(app, { | ||
monitor: { | ||
fetch: true, | ||
logging: true, | ||
cfBindings: true, | ||
}, | ||
}); | ||
|
||
export async function runInference( | ||
client: Ai, | ||
userPrompt: string, | ||
temperature: number, | ||
) { | ||
const result = await client.run( | ||
// @ts-ignore - This model exists in the Worker types as far as I can tell | ||
// I don't know why it's causing a typescript error here :( | ||
"@hf/nousresearch/hermes-2-pro-mistral-7b", | ||
{ | ||
tools: [makeRequestToolHermes], | ||
// Restrict to only using this "make request" tool | ||
tool_choice: { | ||
type: "function", | ||
function: { name: makeRequestToolHermes.name }, | ||
}, | ||
|
||
messages: [ | ||
{ | ||
role: "system", | ||
content: getSystemPrompt("QA"), | ||
}, | ||
// TODO - File issue on the Cloudflare docs repo | ||
// Since this example did not work! | ||
// | ||
// { | ||
// role: "user", | ||
// content: userPrompt, | ||
// }, | ||
], | ||
temperature, | ||
|
||
// NOTE - The request will fail if you don't put the prompt here | ||
prompt: userPrompt, | ||
}, | ||
); | ||
|
||
// HACK - Need to coerce this to a AiTextGenerationOutput | ||
return result as AiTextGenerationOutput; | ||
} | ||
|
||
function parseTemperature( | ||
strTemperature: string | undefined, | ||
fallback: number, | ||
): number { | ||
if (!strTemperature) { | ||
return fallback; | ||
} | ||
|
||
const temperature = Number.parseFloat(strTemperature); | ||
if (Number.isNaN(temperature)) { | ||
return fallback; | ||
} | ||
|
||
return temperature; | ||
} | ||
|
||
const isObjectGuard = (value: unknown): value is object => | ||
typeof value === "object" && value !== null; |
Oops, something went wrong.