conversational-webgpu

Running

nisten commited on Jun 18

Commit

fbfed4d

verified ·

1 Parent(s): 23f231e

Update src/worker.js

Files changed (1) hide show

src/worker.js CHANGED Viewed

@@ -29,7 +29,7 @@ import {
 const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
 let voice;
 const tts = await KokoroTTS.from_pretrained(model_id, {
-  dtype: "fp16",
   device: "webgpu",
 });
@@ -78,17 +78,16 @@ const transcriber = await pipeline(
 await transcriber(new Float32Array(INPUT_SAMPLE_RATE)); // Compile shaders
 const llm_model_id = "onnx-community/Qwen3-1.7B-ONNX";
-const tokenizer = await AutoTokenizer.from_pretrained("Qwen/Qwen3-1.7B"); // Load tokenizer from original repo
 const llm = await AutoModelForCausalLM.from_pretrained(llm_model_id, {
-  dtype: "q4f16", // This alone should make it pick model_q4f16.onnx
   device: "webgpu",
-  // Don't specify model_file_name - let Transformers.js construct the path
 });
 const SYSTEM_MESSAGE = {
   role: "system",
   content:
-    "You're a helpful and conversational voice assistant for financial managers, you have a high EQ and are great at math and behavioral finance. Keep your responses short, clear, and casual. /no_think",
 };
 await llm.generate({ ...tokenizer("x"), max_new_tokens: 1 }); // Compile shaders

 const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
 let voice;
 const tts = await KokoroTTS.from_pretrained(model_id, {
+  dtype: "fp32",
   device: "webgpu",
 });
 await transcriber(new Float32Array(INPUT_SAMPLE_RATE)); // Compile shaders
 const llm_model_id = "onnx-community/Qwen3-1.7B-ONNX";
+const tokenizer = await AutoTokenizer.from_pretrained(llm_model_id);
 const llm = await AutoModelForCausalLM.from_pretrained(llm_model_id, {
+  dtype: "q4f16",
   device: "webgpu",
 });
 const SYSTEM_MESSAGE = {
   role: "system",
   content:
+    "You're a helpful and conversational voice assistant. Keep your responses short, clear, and casual.",
 };
 await llm.generate({ ...tokenizer("x"), max_new_tokens: 1 }); // Compile shaders