# import gradio as gr # import requests # import time # import json # import os # import datetime # from transformers import AutoModelForCausalLM, AutoTokenizer # import torch # ### SET YOUR ASSEMBLYAI API KEY # ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY", "your_assemblyai_api_key") # headers = {"authorization": ASSEMBLYAI_API_KEY} # notes_file = "notes.json" # ### LOAD LLM # model_id = "IlmaJiyadh/phi3-4k-ft" # tokenizer = AutoTokenizer.from_pretrained(model_id) # model = AutoModelForCausalLM.from_pretrained( # model_id, # device_map="auto", # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # trust_remote_code=True # ) # ### TRANSCRIBE AUDIO WITH ASSEMBLYAI # def transcribe(audio_path): # with open(audio_path, 'rb') as f: # upload_res = requests.post("https://api.assemblyai.com/v2/upload", headers=headers, files={"file": f}) # audio_url = upload_res.json()["upload_url"] # transcript_res = requests.post("https://api.assemblyai.com/v2/transcript", json={"audio_url": audio_url}, headers=headers) # transcript_id = transcript_res.json()["id"] # while True: # poll = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers=headers).json() # if poll['status'] == 'completed': # return poll['text'] # elif poll['status'] == 'error': # return f"Transcription failed: {poll['error']}" # time.sleep(2) # ### SUMMARIZE USING LLM # def summarize(text): # prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n" # inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False) # return tokenizer.decode(outputs[0], skip_special_tokens=True) # ## SAVE TO JSON # def save_note(date, transcript, summary): # data = {"date": date, "transcript": transcript, "summary": summary} # if os.path.exists(notes_file): # with open(notes_file, "r") as f: # all_notes = json.load(f) # else: # all_notes = [] # all_notes.append(data) # with open(notes_file, "w") as f: # json.dump(all_notes, f, indent=2) # ### SEARCH NOTES # def search_notes(query): # if not os.path.exists(notes_file): # return "No notes available yet." # with open(notes_file, "r") as f: # notes = json.load(f) # results = [n for n in notes if query.lower() in n['summary'].lower() or query.lower() in n['transcript'].lower()] # if not results: # return "No matching notes found." # return "\n\n".join([f"📅 {n['date']}\n{n['summary']}" for n in results]) # ### FULL PIPELINE # def full_pipeline(audio): # if audio is None: # return "No audio provided", "", "" # transcript = transcribe(audio) # summary = summarize(transcript) # date_str = str(datetime.date.today()) # save_note(date_str, transcript, summary) # return transcript, summary, f"✅ Lecture saved for {date_str}" # ### BUILD GRADIO UI # with gr.Blocks() as demo: # gr.Markdown("# 🎓 Lecture Assistant (Audio → Summary + Search)") # with gr.Row(): # with gr.Column(): # #audio_input = gr.Audio(source="microphone", type="filepath", label="🎙️ Record Audio") # audio_input = gr.Audio(type="filepath", label="🎙️ Record Audio") # submit_btn = gr.Button("Transcribe & Summarize") # transcript_output = gr.Textbox(label="📄 Transcript") # summary_output = gr.Textbox(label="📝 Summary") # save_status = gr.Textbox(label="💾 Save Status") # with gr.Column(): # search_query = gr.Textbox(label="🔍 Search Notes") # search_btn = gr.Button("Search") # search_output = gr.Textbox(label="Results") # submit_btn.click(fn=full_pipeline, inputs=audio_input, outputs=[transcript_output, summary_output, save_status]) # search_btn.click(fn=search_notes, inputs=search_query, outputs=search_output) # demo.launch() import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch model_id = "IlmaJiyadh/phi3-4k-ft" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True ) def summarize(text): prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False) return tokenizer.decode(outputs[0], skip_special_tokens=True) gr.Interface( fn=summarize, inputs=gr.Textbox(lines=10, label="📄 Paste Transcript"), outputs=gr.Textbox(label="📝 Summary"), title="🧠 Transcript → Summary (Phi-3 Fine-tuned)", description="Test only the summarization step." ).launch()