Add support for Style TTS2 models in code examples
Browse files
src/components/ModelCode.tsx
CHANGED
@@ -100,16 +100,31 @@ const ModelCode = ({ isCodeModalOpen, setIsCodeModalOpen }: ModelCodeProps) => {
|
|
100 |
top_k: 5
|
101 |
}
|
102 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
}
|
104 |
|
105 |
-
|
106 |
|
107 |
const ${classType} = pipeline('${pipeline}', '${modelInfo.name}', {
|
108 |
dtype: '${selectedQuantization}',
|
109 |
device: 'webgpu' // 'wasm'
|
110 |
});
|
111 |
const result = await ${classType}(${modelInfo.hasChatTemplate ? exampleData : "'" + exampleData + "'"}, ${JSON.stringify(config, null, 2)});
|
112 |
-
console.log(result);
|
113 |
`
|
114 |
|
115 |
const configPython = Object.entries(config)
|
@@ -119,12 +134,34 @@ console.log(result);
|
|
119 |
)
|
120 |
.join(', ')
|
121 |
|
122 |
-
|
123 |
|
124 |
${classType} = pipeline("${pipeline}", model="${modelInfo.name}")
|
125 |
result = ${classType}(${modelInfo.hasChatTemplate ? exampleData : '"' + exampleData + '"'}, ${configPython})
|
126 |
-
print(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
`
|
|
|
128 |
|
129 |
const copyToClipboard = (text: string) => {
|
130 |
navigator.clipboard.writeText(text)
|
@@ -132,6 +169,7 @@ print(result)
|
|
132 |
setTimeout(() => setIsCopied(false), 2000)
|
133 |
}
|
134 |
const pipelineName = pipeline
|
|
|
135 |
.split('-')
|
136 |
.map((word, index) => word.charAt(0).toUpperCase() + word.slice(1))
|
137 |
.join('')
|
@@ -144,8 +182,19 @@ print(result)
|
|
144 |
title={title}
|
145 |
maxWidth="5xl"
|
146 |
>
|
147 |
-
{/* ... (all your modal content JSX is unchanged) */}
|
148 |
<div className="text-sm max-w-none px-4">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
<div className="flex flex-row">
|
150 |
<img src="/javascript-logo.svg" className="w-6 h-6 mr-1 rounded" />
|
151 |
<h2 className="text-lg font-medium mb-2">Javascript</h2>
|
@@ -153,7 +202,7 @@ print(result)
|
|
153 |
<div className="flex flex-row items-center text-sm hover:underline text-foreground/60">
|
154 |
<Link className="h-3 w-3 mr-2" />
|
155 |
<a
|
156 |
-
href={`https://huggingface.co/docs/transformers.js/api/pipelines#pipelines${pipeline.replace(/-/g, '')}pipeline`}
|
157 |
target="_blank"
|
158 |
rel="noopener noreferrer"
|
159 |
>
|
|
|
100 |
top_k: 5
|
101 |
}
|
102 |
break
|
103 |
+
case 'text-to-speech':
|
104 |
+
classType = 'synthesizer'
|
105 |
+
exampleData =
|
106 |
+
"Life is like a box of chocolates. You never know what you're gonna get."
|
107 |
+
if (modelInfo.isStyleTTS2) {
|
108 |
+
config = {
|
109 |
+
voice: 'af_heart'
|
110 |
+
}
|
111 |
+
} else {
|
112 |
+
config = {
|
113 |
+
speaker_embeddings:
|
114 |
+
'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin'
|
115 |
+
}
|
116 |
+
}
|
117 |
+
break
|
118 |
}
|
119 |
|
120 |
+
let jsCode = `import { pipeline } from '@huggingface/transformers';
|
121 |
|
122 |
const ${classType} = pipeline('${pipeline}', '${modelInfo.name}', {
|
123 |
dtype: '${selectedQuantization}',
|
124 |
device: 'webgpu' // 'wasm'
|
125 |
});
|
126 |
const result = await ${classType}(${modelInfo.hasChatTemplate ? exampleData : "'" + exampleData + "'"}, ${JSON.stringify(config, null, 2)});
|
127 |
+
${pipeline === 'text-to-speech' ? "result.save('audio.wav')" : 'console.log(result);'}
|
128 |
`
|
129 |
|
130 |
const configPython = Object.entries(config)
|
|
|
134 |
)
|
135 |
.join(', ')
|
136 |
|
137 |
+
let pythonCode = `from transformers import pipeline
|
138 |
|
139 |
${classType} = pipeline("${pipeline}", model="${modelInfo.name}")
|
140 |
result = ${classType}(${modelInfo.hasChatTemplate ? exampleData : '"' + exampleData + '"'}, ${configPython})
|
141 |
+
${pipeline === 'text-to-speech' ? 'audio = result["audio"]' : 'print(result)'}
|
142 |
+
`
|
143 |
+
|
144 |
+
if (modelInfo.isStyleTTS2) {
|
145 |
+
jsCode = `
|
146 |
+
import { KokoroTTS } from "kokoro-js";
|
147 |
+
const tts = await KokoroTTS.from_pretrained('${modelInfo.name}', {
|
148 |
+
dtype: '${selectedQuantization}',
|
149 |
+
device: 'webgpu' // 'wasm'
|
150 |
+
});
|
151 |
+
|
152 |
+
const audio = await tts.generate("${exampleData}", ${JSON.stringify(config, null, 2)});
|
153 |
+
audio.save("audio.wav");
|
154 |
+
`
|
155 |
+
|
156 |
+
pythonCode = `!pip install -q kokoro>=0.9.4 soundfile
|
157 |
+
from kokoro import KPipeline
|
158 |
+
|
159 |
+
pipeline = KPipeline(lang_code='a')
|
160 |
+
generator = pipeline("${exampleData}", voice='af_heart')
|
161 |
+
for i, (gs, ps, audio) in enumerate(generator):
|
162 |
+
print(i, gs, ps)
|
163 |
`
|
164 |
+
}
|
165 |
|
166 |
const copyToClipboard = (text: string) => {
|
167 |
navigator.clipboard.writeText(text)
|
|
|
169 |
setTimeout(() => setIsCopied(false), 2000)
|
170 |
}
|
171 |
const pipelineName = pipeline
|
172 |
+
.replace('speech', 'audio')
|
173 |
.split('-')
|
174 |
.map((word, index) => word.charAt(0).toUpperCase() + word.slice(1))
|
175 |
.join('')
|
|
|
182 |
title={title}
|
183 |
maxWidth="5xl"
|
184 |
>
|
|
|
185 |
<div className="text-sm max-w-none px-4">
|
186 |
+
{modelInfo.isStyleTTS2 && (
|
187 |
+
<div className="flex flex-row items-center text-sm hover:underline text-foreground/60 mb-4">
|
188 |
+
<a
|
189 |
+
href={`https://github.com/hexgrad/kokoro`}
|
190 |
+
target="_blank"
|
191 |
+
rel="noopener noreferrer"
|
192 |
+
>
|
193 |
+
Check Kokoro github for more info about Style TTS2 models
|
194 |
+
</a>
|
195 |
+
</div>
|
196 |
+
)}
|
197 |
+
|
198 |
<div className="flex flex-row">
|
199 |
<img src="/javascript-logo.svg" className="w-6 h-6 mr-1 rounded" />
|
200 |
<h2 className="text-lg font-medium mb-2">Javascript</h2>
|
|
|
202 |
<div className="flex flex-row items-center text-sm hover:underline text-foreground/60">
|
203 |
<Link className="h-3 w-3 mr-2" />
|
204 |
<a
|
205 |
+
href={`https://huggingface.co/docs/transformers.js/api/pipelines#pipelines${pipeline.replace(/-/g, '').replace('speech', 'audio')}pipeline`}
|
206 |
target="_blank"
|
207 |
rel="noopener noreferrer"
|
208 |
>
|
src/components/pipelines/TextToSpeechConfig.tsx
CHANGED
@@ -30,7 +30,7 @@ const TextToSpeechConfig: React.FC<TextToSpeechConfigProps> = ({
|
|
30 |
Select Voice
|
31 |
</Label>
|
32 |
<Select
|
33 |
-
value={config.voice}
|
34 |
onValueChange={(value) =>
|
35 |
setConfig((prev) => ({
|
36 |
...prev,
|
|
|
30 |
Select Voice
|
31 |
</Label>
|
32 |
<Select
|
33 |
+
value={config.voice || ''}
|
34 |
onValueChange={(value) =>
|
35 |
setConfig((prev) => ({
|
36 |
...prev,
|