IlmaJiyadh's picture
Update app.py
bcdb714 verified
# import gradio as gr
# import requests
# import time
# import json
# import os
# import datetime
# from transformers import AutoModelForCausalLM, AutoTokenizer
# import torch
# ### SET YOUR ASSEMBLYAI API KEY
# ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY", "your_assemblyai_api_key")
# headers = {"authorization": ASSEMBLYAI_API_KEY}
# notes_file = "notes.json"
# ### LOAD LLM
# model_id = "IlmaJiyadh/phi3-4k-ft"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(
# model_id,
# device_map="auto",
# torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
# trust_remote_code=True
# )
# ### TRANSCRIBE AUDIO WITH ASSEMBLYAI
# def transcribe(audio_path):
# with open(audio_path, 'rb') as f:
# upload_res = requests.post("https://api.assemblyai.com/v2/upload", headers=headers, files={"file": f})
# audio_url = upload_res.json()["upload_url"]
# transcript_res = requests.post("https://api.assemblyai.com/v2/transcript", json={"audio_url": audio_url}, headers=headers)
# transcript_id = transcript_res.json()["id"]
# while True:
# poll = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers=headers).json()
# if poll['status'] == 'completed':
# return poll['text']
# elif poll['status'] == 'error':
# return f"Transcription failed: {poll['error']}"
# time.sleep(2)
# ### SUMMARIZE USING LLM
# def summarize(text):
# prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n"
# inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False)
# return tokenizer.decode(outputs[0], skip_special_tokens=True)
# ## SAVE TO JSON
# def save_note(date, transcript, summary):
# data = {"date": date, "transcript": transcript, "summary": summary}
# if os.path.exists(notes_file):
# with open(notes_file, "r") as f:
# all_notes = json.load(f)
# else:
# all_notes = []
# all_notes.append(data)
# with open(notes_file, "w") as f:
# json.dump(all_notes, f, indent=2)
# ### SEARCH NOTES
# def search_notes(query):
# if not os.path.exists(notes_file):
# return "No notes available yet."
# with open(notes_file, "r") as f:
# notes = json.load(f)
# results = [n for n in notes if query.lower() in n['summary'].lower() or query.lower() in n['transcript'].lower()]
# if not results:
# return "No matching notes found."
# return "\n\n".join([f"πŸ“… {n['date']}\n{n['summary']}" for n in results])
# ### FULL PIPELINE
# def full_pipeline(audio):
# if audio is None:
# return "No audio provided", "", ""
# transcript = transcribe(audio)
# summary = summarize(transcript)
# date_str = str(datetime.date.today())
# save_note(date_str, transcript, summary)
# return transcript, summary, f"βœ… Lecture saved for {date_str}"
# ### BUILD GRADIO UI
# with gr.Blocks() as demo:
# gr.Markdown("# πŸŽ“ Lecture Assistant (Audio β†’ Summary + Search)")
# with gr.Row():
# with gr.Column():
# #audio_input = gr.Audio(source="microphone", type="filepath", label="πŸŽ™οΈ Record Audio")
# audio_input = gr.Audio(type="filepath", label="πŸŽ™οΈ Record Audio")
# submit_btn = gr.Button("Transcribe & Summarize")
# transcript_output = gr.Textbox(label="πŸ“„ Transcript")
# summary_output = gr.Textbox(label="πŸ“ Summary")
# save_status = gr.Textbox(label="πŸ’Ύ Save Status")
# with gr.Column():
# search_query = gr.Textbox(label="πŸ” Search Notes")
# search_btn = gr.Button("Search")
# search_output = gr.Textbox(label="Results")
# submit_btn.click(fn=full_pipeline, inputs=audio_input, outputs=[transcript_output, summary_output, save_status])
# search_btn.click(fn=search_notes, inputs=search_query, outputs=search_output)
# demo.launch()
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_id = "IlmaJiyadh/phi3-4k-ft"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
trust_remote_code=True
)
def summarize(text):
prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
gr.Interface(
fn=summarize,
inputs=gr.Textbox(lines=10, label="πŸ“„ Paste Transcript"),
outputs=gr.Textbox(label="πŸ“ Summary"),
title="🧠 Transcript β†’ Summary (Phi-3 Fine-tuned)",
description="Test only the summarization step."
).launch()