Basic instrumentation of Cloudflare bindings (AI, KV, R2, D1) (#196)

* Prototype proxying cloudflare bindings (AI and R2) * Update comment on measuredBinding * Fix typescript error * Add goosify sample app with every cf binding imaginable * Set up local D1 with drizzle * Return geese from goosify (and format code) * Try implementing a d1 binding * Remove cl * Format * Rename some attrs * Start adding more attributes depending on the args to the binding, etc * Proxy kv * Finish D1 instrumentation * Format * Clean up cf binding code and move to patch module * Use constants for cf binding attributes * Fix ts-expect-error * Try recording non-binary responses * Add a German Gans * Add showcase request generation app * Add function calling cloudflare ai example * Rename sample-apps to examples * Update ai request generation example * Turn off cloudflare binding instrumentation by default, and add it back in for the example apps * Update dev vars example for goosify
fiberplane · Aug 28, 2024 · c31e14a · c31e14a
1 parent f113e59
commit c31e14a
Show file tree

Hide file tree

Showing 44 changed files with 1,731 additions and 56 deletions.
diff --git a/examples/ai-request-generation/.dev.vars.example b/examples/ai-request-generation/.dev.vars.example
@@ -0,0 +1 @@
+FPX_ENDPOINT=http://localhost:8788/v1/traces
diff --git a/sample-apps/goose-quotes/.gitignore → examples/ai-request-generation/.gitignore b/sample-apps/goose-quotes/.gitignore → examples/ai-request-generation/.gitignore
diff --git a/examples/ai-request-generation/README.md b/examples/ai-request-generation/README.md
@@ -0,0 +1,36 @@
+# Request Parameter Generation with Hermes 
+
+This example shows how to use a Cloudflare Workers AI model that supports function calling in order to generate request parameters for a Hono route. It uses the [Hermes](https://huggingface.co/nousresearch/hermes-2-pro-mistral-7b) model, which is a Mistral-based model that supports function calling.
+
+Fiberplane Studio is used to add timing information to the request. Instrumentation of the Cloudflare `AI` binding should happen automagically.
+
+## Running Locally
+
+You will need a Cloudflare account in order to run this locally, since AI inference is billed.
+
+```sh
+pnpm i
+pnpm dev
+```
+
+Then, you can inspect the request and response in Fiberplane Studio.
+
+```sh
+npx @fiberplane/studio
+```
+
+Test one of the following JSON request bodies against the `POST /` route, and you'll see structured output describing a sample HTTP request.
+
+You can adjust query parameters like `temperature` in the request query params.
+
+```json
+{
+  "prompt": "GET /users/:id"
+}
+```
+
+```json
+{
+  "prompt": "GET /users/:id"
+}
+```
diff --git a/examples/ai-request-generation/package.json b/examples/ai-request-generation/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "request-generation-showcase",
+  "scripts": {
+    "dev": "wrangler dev src/index.ts",
+    "deploy": "wrangler deploy --minify src/index.ts"
+  },
+  "dependencies": {
+    "@fiberplane/hono-otel": "workspace:*",
+    "@langchain/core": "^0.2.18",
+    "hono": "^4.5.9"
+  },
+  "devDependencies": {
+    "@cloudflare/workers-types": "^4.20240821.1",
+    "wrangler": "^3.72.3"
+  }
+}
diff --git a/examples/ai-request-generation/src/index.ts b/examples/ai-request-generation/src/index.ts
@@ -0,0 +1,132 @@
+import { instrument } from "@fiberplane/hono-otel";
+import { Hono } from "hono";
+import { getSystemPrompt } from "./prompts";
+import { makeRequestToolHermes } from "./tools";
+
+type Bindings = {
+  DATABASE_URL: string;
+  // Cloudflare Workers AI binding
+  // enabled in wrangler.toml with:
+  //
+  // > [ai]
+  // > binding = "AI"
+  AI: Ai;
+};
+
+const app = new Hono<{ Bindings: Bindings }>();
+
+app.post("/", async (c) => {
+  const temperature = parseTemperature(c.req.query("temperature"), 0.12);
+  const body = await c.req.json();
+  const inferenceResult = await runInference(
+    c.env.AI,
+    body.prompt,
+    temperature,
+  );
+
+  // We are not using streaming outputs, but just in case, handle the stream here
+  if (inferenceResult instanceof ReadableStream) {
+    return c.json(
+      {
+        message: "Unexpected inference result (stream)",
+      },
+      500,
+    );
+  }
+
+  // We are theoretically enforcing a tool call, so this should not happen
+  if (inferenceResult.response != null) {
+    return c.json(
+      {
+        message: "Unexpected inference result (text)",
+      },
+      500,
+    );
+  }
+
+  // Parse the tool call
+  const makeRequestCall = inferenceResult.tool_calls?.[0];
+  const requestDescriptor = makeRequestCall?.arguments;
+
+  // TODO - Validate the request descriptor against the JSON Schema from the tool definition
+  if (!isObjectGuard(requestDescriptor)) {
+    return c.json(
+      {
+        message: "Invalid request descriptor",
+      },
+      500,
+    );
+  }
+
+  console.log("requestDescriptor", JSON.stringify(requestDescriptor, null, 2));
+
+  return c.json(requestDescriptor);
+});
+
+export default instrument(app, {
+  monitor: {
+    fetch: true,
+    logging: true,
+    cfBindings: true,
+  },
+});
+
+export async function runInference(
+  client: Ai,
+  userPrompt: string,
+  temperature: number,
+) {
+  const result = await client.run(
+    // @ts-ignore - This model exists in the Worker types as far as I can tell
+    //              I don't know why it's causing a typescript error here :(
+    "@hf/nousresearch/hermes-2-pro-mistral-7b",
+    {
+      tools: [makeRequestToolHermes],
+      // Restrict to only using this "make request" tool
+      tool_choice: {
+        type: "function",
+        function: { name: makeRequestToolHermes.name },
+      },
+
+      messages: [
+        {
+          role: "system",
+          content: getSystemPrompt("QA"),
+        },
+        // TODO - File issue on the Cloudflare docs repo
+        //        Since this example did not work!
+        //
+        // {
+        //   role: "user",
+        //   content: userPrompt,
+        // },
+      ],
+      temperature,
+
+      // NOTE - The request will fail if you don't put the prompt here
+      prompt: userPrompt,
+    },
+  );
+
+  // HACK - Need to coerce this to a AiTextGenerationOutput
+  return result as AiTextGenerationOutput;
+}
+
+function parseTemperature(
+  strTemperature: string | undefined,
+  fallback: number,
+): number {
+  if (!strTemperature) {
+    return fallback;
+  }
+
+  const temperature = Number.parseFloat(strTemperature);
+  if (Number.isNaN(temperature)) {
+    return fallback;
+  }
+
+  return temperature;
+}
+
+const isObjectGuard = (value: unknown): value is object =>
+  typeof value === "object" && value !== null;