From d13f518a11993f2255582434ec27c8eaf4b3de73 Mon Sep 17 00:00:00 2001
From: Neko Ayaka <neko@ayaka.moe>
Date: Thu, 26 Dec 2024 03:23:40 +0800
Subject: [PATCH] fix: vad & force json format

---
 cspell.config.yaml                       |  1 +
 packages/stage/src/composables/micvad.ts |  6 --
 packages/stage/src/utils/jsonFormat.ts   | 90 ++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 6 deletions(-)
 create mode 100644 packages/stage/src/utils/jsonFormat.ts
diff --git a/cspell.config.yaml b/cspell.config.yaml
index cba56c1..5a582e3 100644
--- a/cspell.config.yaml
+++ b/cspell.config.yaml
@@ -70,6 +70,7 @@ words:
   - vrma
   - vueuse
   - webgpu
+  - worklet
   - xsai
 ignoreWords: []
 import: []
diff --git a/packages/stage/src/composables/micvad.ts b/packages/stage/src/composables/micvad.ts
index 48d50ab..3151e57 100644
--- a/packages/stage/src/composables/micvad.ts
+++ b/packages/stage/src/composables/micvad.ts
@@ -11,12 +11,6 @@ export function useMicVAD(deviceId: MaybeRef<ConstrainDOMString | undefined>, op
     positiveSpeechThreshold: 0.5, // default is 0.5
     negativeSpeechThreshold: 0.5 - 0.15, // default is 0.5 - 0.15
     minSpeechFrames: 15, // default is 9
-    // WORKAROUND: temporary workaround for onnxruntime-web, since @ricky0123/vad-web
-    // uses hardcoded version of onnxruntime-web@1.14.0 to fetch the already non-existing
-    // ort-wasm-simd-threaded.mjs file and its WASM binary, we are going to force
-    // the onnxruntime-web to use the latest version of onnxruntime-web from jsdelivr
-    // to fetch the correct ort-wasm-simd-threaded.wasm binary
-    onnxWASMBasePath: 'https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/',
     auto: true,
   })
 
diff --git a/packages/stage/src/utils/jsonFormat.ts b/packages/stage/src/utils/jsonFormat.ts
new file mode 100644
index 0000000..7685550
--- /dev/null
+++ b/packages/stage/src/utils/jsonFormat.ts
@@ -0,0 +1,90 @@
+import type { Infer, Schema } from '@typeschema/valibot'
+import type { CommonProviderOptions } from '@xsai/providers'
+import type { Message } from '@xsai/shared-chat'
+
+import { toJSONSchema, validate } from '@typeschema/valibot'
+import { generateText } from '@xsai/generate-text'
+import { user } from '@xsai/shared-chat'
+
+type SchemaOrString<S extends Schema | undefined | unknown> = S extends unknown ? string : S extends Schema ? Infer<S> : never
+
+async function parseJSONFormat<S extends Schema, R extends SchemaOrString<S>>(content: string, options: { messages: Message[], apiKey?: string, baseURL: string, model: string } & CommonProviderOptions, schema?: S, erroredValue?: string, errorMessage?: string): Promise<R> {
+  if (!schema)
+    return content as unknown as R
+
+  try {
+    let parsedContent: Infer<S>
+    let correctionPrompt = ''
+
+    if (erroredValue && errorMessage) {
+      correctionPrompt = `Previous response "${JSON.stringify(erroredValue)}" was invalid due to: ${JSON.stringify(errorMessage)}\n\n`
+    }
+
+    try {
+      parsedContent = JSON.parse(content)
+    }
+    catch (parseError) {
+      console.error('Error parsing JSON:', parseError, content)
+
+      options.messages.push(user(`
+${correctionPrompt}The response was not valid JSON:
+${JSON.stringify(content)}
+
+Error: ${String(parseError)}
+
+Please provide a corrected JSON response that matches the schema:
+${JSON.stringify(await toJSONSchema(schema))}`))
+
+      const response = await call(options, schema)
+      return parseJSONFormat(response, options, schema, content, String(parseError))
+    }
+
+    const validation = await validate(schema, parsedContent)
+    if (validation.success) {
+      return parsedContent as R
+    }
+
+    console.error('Schema validation failed:', validation.issues, parsedContent)
+    options.messages.push(user(`
+${correctionPrompt}The response failed schema validation:
+${JSON.stringify(parsedContent)}
+
+Validation errors:
+${validation.issues.map(issue => `- ${issue.message}`).join('\n')}
+
+Please provide a corrected response that matches the schema:
+${JSON.stringify(await toJSONSchema(schema))}`))
+
+    const response = await call(options, schema)
+    return parseJSONFormat(response, options, schema, JSON.stringify(parsedContent), validation.issues.map(i => i.message).join(', '))
+  }
+  catch (error) {
+    console.error('Error processing response:', error)
+    throw error
+  }
+}
+
+/**
+ * Processes user input and generates LLM response along with thought nodes.
+ */
+async function call<S extends Schema, R extends SchemaOrString<S>>(options: { messages: Message[], apiKey?: string, baseURL: string, model: string } & CommonProviderOptions, schema?: S): Promise<R> {
+  if (schema != null) {
+    options.messages.push(user(`Your response must follow the following schema:
+${JSON.stringify(await toJSONSchema(schema))}
+
+Without any extra markups such as \`\`\` in markdown, or descriptions.`))
+  }
+
+  const response = await generateText({
+    baseURL: options.baseURL,
+    apiKey: options.apiKey,
+    model: options.model,
+    messages: options.messages,
+  })
+
+  return await parseJSONFormat<S, R>(response.text || '', options, schema)
+}
+
+export async function generateObject<S extends Schema, R extends SchemaOrString<S>>(options: { messages: Message[], model: string, apiKey?: string, baseURL: string } & CommonProviderOptions, schema?: S): Promise<R> {
+  return await call(options, schema)
+}