Spaces:

Vokturz
/

transformers-js-playground

Running

App Files Files Community

transformers-js-playground / public /workers /text-generation.js

Vokturz's picture

Update transformers library to latest version across all worker scripts and force fallback to WASM

415aaef 13 days ago

history blame contribute delete

3.97 kB

	/* eslint-disable no-restricted-globals */
	import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@latest'

	class MyTextGenerationPipeline {
	static task = 'text-generation'
	static instance = null
	static currentGeneration = null

	static async getInstance(model, dtype = 'fp32', progress_callback = null) {
	try {
	// Try WebGPU first
	this.instance = await pipeline(this.task, model, {
	dtype,
	device: 'webgpu',
	progress_callback
	})
	return this.instance
	} catch (webgpuError) {
	// Fallback to WASM if WebGPU fails
	if (progress_callback) {
	progress_callback({
	status: 'fallback',
	message: 'WebGPU failed, falling back to WASM'
	})
	}
	try {
	this.instance = await pipeline(this.task, model, {
	dtype,
	device: 'wasm',
	progress_callback
	})
	return this.instance
	} catch (wasmError) {
	throw new Error(
	`Both WebGPU and WASM failed. WebGPU error: ${webgpuError.message}. WASM error: ${wasmError.message}`
	)
	}
	}
	}

	static stopGeneration() {
	if (this.currentGeneration) {
	this.currentGeneration.abort()
	this.currentGeneration = null
	}
	}
	}

	// Listen for messages from the main thread
	self.addEventListener('message', async (event) => {
	try {
	const { type, model, dtype, messages, prompt, hasChatTemplate, config } =
	event.data

	if (type === 'stop') {
	MyTextGenerationPipeline.stopGeneration()
	self.postMessage({ status: 'ready' })
	return
	}

	if (!model) {
	self.postMessage({
	status: 'error',
	output: 'No model provided'
	})
	return
	}

	// Retrieve the pipeline. This will download the model if not already cached.
	const generator = await MyTextGenerationPipeline.getInstance(
	model,
	dtype,
	(x) => {
	self.postMessage({ status: 'loading', output: x })
	}
	)

	if (type === 'load') {
	self.postMessage({
	status: 'ready',
	output: `Model ${model}, dtype ${dtype} loaded`
	})
	return
	}

	if (type === 'generate') {
	let inputText = ''

	if (hasChatTemplate && messages && messages.length > 0) {
	inputText = messages
	} else if (!hasChatTemplate && prompt) {
	inputText = prompt
	} else {
	self.postMessage({ status: 'ready' })
	return
	}

	const options = {
	max_new_tokens: config.max_new_tokens \|\| 100,
	temperature: config.temperature \|\| 0.7,
	do_sample: config.do_sample !== false,
	...(config.top_p && { top_p }),
	...(config.top_k && { top_k })
	}

	// Create an AbortController for this generation
	const abortController = new AbortController()
	MyTextGenerationPipeline.currentGeneration = abortController

	try {
	const output = await generator(inputText, {
	...options,
	signal: abortController.signal
	})

	if (hasChatTemplate) {
	// For chat mode, extract only the assistant's response
	self.postMessage({
	status: 'output',
	output: output[0].generated_text.slice(-1)[0]
	})
	} else {
	self.postMessage({
	status: 'output',
	output: {
	role: 'assistant',
	content: output[0].generated_text
	}
	})
	}

	self.postMessage({ status: 'ready' })
	} catch (error) {
	if (error.name === 'AbortError') {
	self.postMessage({ status: 'ready' })
	} else {
	throw error
	}
	} finally {
	MyTextGenerationPipeline.currentGeneration = null
	}
	}
	} catch (error) {
	self.postMessage({
	status: 'error',
	output: error.message \|\| 'An error occurred during text generation'
	})
	}
	})