Spaces:
Running
Running
# import gradio as gr | |
# import requests | |
# import time | |
# import json | |
# import os | |
# import datetime | |
# from transformers import AutoModelForCausalLM, AutoTokenizer | |
# import torch | |
# ### SET YOUR ASSEMBLYAI API KEY | |
# ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY", "your_assemblyai_api_key") | |
# headers = {"authorization": ASSEMBLYAI_API_KEY} | |
# notes_file = "notes.json" | |
# ### LOAD LLM | |
# model_id = "IlmaJiyadh/phi3-4k-ft" | |
# tokenizer = AutoTokenizer.from_pretrained(model_id) | |
# model = AutoModelForCausalLM.from_pretrained( | |
# model_id, | |
# device_map="auto", | |
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
# trust_remote_code=True | |
# ) | |
# ### TRANSCRIBE AUDIO WITH ASSEMBLYAI | |
# def transcribe(audio_path): | |
# with open(audio_path, 'rb') as f: | |
# upload_res = requests.post("https://api.assemblyai.com/v2/upload", headers=headers, files={"file": f}) | |
# audio_url = upload_res.json()["upload_url"] | |
# transcript_res = requests.post("https://api.assemblyai.com/v2/transcript", json={"audio_url": audio_url}, headers=headers) | |
# transcript_id = transcript_res.json()["id"] | |
# while True: | |
# poll = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers=headers).json() | |
# if poll['status'] == 'completed': | |
# return poll['text'] | |
# elif poll['status'] == 'error': | |
# return f"Transcription failed: {poll['error']}" | |
# time.sleep(2) | |
# ### SUMMARIZE USING LLM | |
# def summarize(text): | |
# prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n" | |
# inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
# outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False) | |
# return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# ## SAVE TO JSON | |
# def save_note(date, transcript, summary): | |
# data = {"date": date, "transcript": transcript, "summary": summary} | |
# if os.path.exists(notes_file): | |
# with open(notes_file, "r") as f: | |
# all_notes = json.load(f) | |
# else: | |
# all_notes = [] | |
# all_notes.append(data) | |
# with open(notes_file, "w") as f: | |
# json.dump(all_notes, f, indent=2) | |
# ### SEARCH NOTES | |
# def search_notes(query): | |
# if not os.path.exists(notes_file): | |
# return "No notes available yet." | |
# with open(notes_file, "r") as f: | |
# notes = json.load(f) | |
# results = [n for n in notes if query.lower() in n['summary'].lower() or query.lower() in n['transcript'].lower()] | |
# if not results: | |
# return "No matching notes found." | |
# return "\n\n".join([f"π {n['date']}\n{n['summary']}" for n in results]) | |
# ### FULL PIPELINE | |
# def full_pipeline(audio): | |
# if audio is None: | |
# return "No audio provided", "", "" | |
# transcript = transcribe(audio) | |
# summary = summarize(transcript) | |
# date_str = str(datetime.date.today()) | |
# save_note(date_str, transcript, summary) | |
# return transcript, summary, f"β Lecture saved for {date_str}" | |
# ### BUILD GRADIO UI | |
# with gr.Blocks() as demo: | |
# gr.Markdown("# π Lecture Assistant (Audio β Summary + Search)") | |
# with gr.Row(): | |
# with gr.Column(): | |
# #audio_input = gr.Audio(source="microphone", type="filepath", label="ποΈ Record Audio") | |
# audio_input = gr.Audio(type="filepath", label="ποΈ Record Audio") | |
# submit_btn = gr.Button("Transcribe & Summarize") | |
# transcript_output = gr.Textbox(label="π Transcript") | |
# summary_output = gr.Textbox(label="π Summary") | |
# save_status = gr.Textbox(label="πΎ Save Status") | |
# with gr.Column(): | |
# search_query = gr.Textbox(label="π Search Notes") | |
# search_btn = gr.Button("Search") | |
# search_output = gr.Textbox(label="Results") | |
# submit_btn.click(fn=full_pipeline, inputs=audio_input, outputs=[transcript_output, summary_output, save_status]) | |
# search_btn.click(fn=search_notes, inputs=search_query, outputs=search_output) | |
# demo.launch() | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
model_id = "IlmaJiyadh/phi3-4k-ft" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
trust_remote_code=True | |
) | |
def summarize(text): | |
prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n" | |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
gr.Interface( | |
fn=summarize, | |
inputs=gr.Textbox(lines=10, label="π Paste Transcript"), | |
outputs=gr.Textbox(label="π Summary"), | |
title="π§ Transcript β Summary (Phi-3 Fine-tuned)", | |
description="Test only the summarization step." | |
).launch() | |