<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <script src="https://cdn.tailwindcss.com"></script> <script src="https://cdn.jsdelivr.net/pyodide/v0.19.1/full/pyodide.js"></script> </head> <body> <div class="container mx-auto px-4"> <h1 class="text-3xl mt-6 font-bold"> 🚚 Pyodide demo </h1> <h2 class="text-gray">Python implementation of GPT-2 Tokenizer running inside your browser</h2> <div class="text-sm text-gray-800">Open your browser console to see Pyodide output</div> <div class="text-xs text-green-800 mt-4 js-init">Initialization: ...</div> <div class="flex gap-6 mt-10"> <div class="flex-1 bg-gray-50 p-4 rounded-xl border border-gray-200/60"> <input type="text" placeholder="Enter your sentence…" value="This text is transformed into tokens" class="w-full py-3 px-6" /> <div class="flex gap-4 my-4"> <button class="js-clear bg-gray-50 flex-1 p-3 rounded font-semibold focus:outline-none">Clear</button> <button class="js-submt bg-indigo-200 flex-1 p-3 rounded font-semibold focus:outline-none">Submit</button> </div> </div> <div class="flex-1 bg-gray-50 p-4 rounded-xl border border-gray-200/60"> <textarea placeholder="Output" class="w-full py-3 px-6 font-mono"></textarea> </div> </div> <h4 class="text-xs mt-10 mb-1">Python code being run:</h4> <pre class="js-code text-gray-500 text-xs bg-gray-50 p-4 rounded-xl border border-gray-200/60"></pre> </div> <script type="text/javascript"> const divInit = document.querySelector(".js-init"); const btnClear = document.querySelector(".js-clear"); const btnSubmt = document.querySelector(".js-submt"); const inputField = document.querySelector("input[type=text]"); const URL_VOCAB = "https://huggingface.co/gpt2/resolve/main/vocab.json"; const URL_MERGES = "https://huggingface.co/gpt2/resolve/main/merges.txt"; (async function main() { const c = console; const vocab = await (await fetch(URL_VOCAB)).text(); divInit.innerHTML += `<br> Downloaded vocab from ${URL_VOCAB}`; const merges = await (await fetch(URL_MERGES)).text(); divInit.innerHTML += `<br> Downloaded merges from ${URL_MERGES}`; const py_code = await (await fetch("./encoder.py")).text(); document.querySelector(".js-code").textContent = py_code; divInit.innerHTML += `<br> Downloaded python code from present repo`; const pyodide = await loadPyodide({ indexURL : "https://cdn.jsdelivr.net/pyodide/v0.19.1/full/" }); divInit.innerHTML += `<br> Initialized Pyodide`; await pyodide.loadPackagesFromImports(py_code); pyodide.runPython(py_code); pyodide.globals.set("vocab", vocab); pyodide.globals.set("merges", merges); pyodide.runPython(`encoder = get_encoder_from_strings(vocab, merges)`); divInit.innerHTML += `<br> Initialized tokenizer`; const compute = () => { const inputVal = inputField.value; const out = Array.from(pyodide.runPython(`encoder.encode(${JSON.stringify(inputVal)})`)); /// ^ array of ints document.querySelector("textarea").value = out.join(" "); }; btnSubmt.addEventListener("click", compute); inputField.addEventListener("input", compute); btnSubmt.click(); btnClear.addEventListener("click", () => { inputField.value = ""; compute(); }); inputField.focus(); inputField.selectionStart = inputField.selectionEnd = inputField.value.length; })(); </script> </body> </html>