IlmaJiyadh commited on
Commit
bcdb714
Β·
verified Β·
1 Parent(s): 7787d4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -85
app.py CHANGED
@@ -1,18 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- import requests
3
- import time
4
- import json
5
- import os
6
- import datetime
7
- from transformers import AutoModelForCausalLM, AutoTokenizer
8
  import torch
9
 
10
- ### SET YOUR ASSEMBLYAI API KEY
11
- ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY", "your_assemblyai_api_key")
12
- headers = {"authorization": ASSEMBLYAI_API_KEY}
13
- notes_file = "notes.json"
14
-
15
- ### LOAD LLM
16
  model_id = "IlmaJiyadh/phi3-4k-ft"
17
  tokenizer = AutoTokenizer.from_pretrained(model_id)
18
  model = AutoModelForCausalLM.from_pretrained(
@@ -22,82 +115,17 @@ model = AutoModelForCausalLM.from_pretrained(
22
  trust_remote_code=True
23
  )
24
 
25
- ### TRANSCRIBE AUDIO WITH ASSEMBLYAI
26
- def transcribe(audio_path):
27
- with open(audio_path, 'rb') as f:
28
- upload_res = requests.post("https://api.assemblyai.com/v2/upload", headers=headers, files={"file": f})
29
- audio_url = upload_res.json()["upload_url"]
30
-
31
- transcript_res = requests.post("https://api.assemblyai.com/v2/transcript", json={"audio_url": audio_url}, headers=headers)
32
- transcript_id = transcript_res.json()["id"]
33
-
34
- while True:
35
- poll = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers=headers).json()
36
- if poll['status'] == 'completed':
37
- return poll['text']
38
- elif poll['status'] == 'error':
39
- return f"Transcription failed: {poll['error']}"
40
- time.sleep(2)
41
-
42
- ### SUMMARIZE USING LLM
43
  def summarize(text):
44
  prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n"
45
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
  outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False)
47
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
48
 
49
- ### SAVE TO JSON
50
- def save_note(date, transcript, summary):
51
- data = {"date": date, "transcript": transcript, "summary": summary}
52
- if os.path.exists(notes_file):
53
- with open(notes_file, "r") as f:
54
- all_notes = json.load(f)
55
- else:
56
- all_notes = []
57
- all_notes.append(data)
58
- with open(notes_file, "w") as f:
59
- json.dump(all_notes, f, indent=2)
60
-
61
- ### SEARCH NOTES
62
- def search_notes(query):
63
- if not os.path.exists(notes_file):
64
- return "No notes available yet."
65
- with open(notes_file, "r") as f:
66
- notes = json.load(f)
67
- results = [n for n in notes if query.lower() in n['summary'].lower() or query.lower() in n['transcript'].lower()]
68
- if not results:
69
- return "No matching notes found."
70
- return "\n\n".join([f"πŸ“… {n['date']}\n{n['summary']}" for n in results])
71
-
72
- ### FULL PIPELINE
73
- def full_pipeline(audio):
74
- if audio is None:
75
- return "No audio provided", "", ""
76
-
77
- transcript = transcribe(audio)
78
- summary = summarize(transcript)
79
- date_str = str(datetime.date.today())
80
- save_note(date_str, transcript, summary)
81
- return transcript, summary, f"βœ… Lecture saved for {date_str}"
82
-
83
- ### BUILD GRADIO UI
84
- with gr.Blocks() as demo:
85
- gr.Markdown("# πŸŽ“ Lecture Assistant (Audio β†’ Summary + Search)")
86
- with gr.Row():
87
- with gr.Column():
88
- #audio_input = gr.Audio(source="microphone", type="filepath", label="πŸŽ™οΈ Record Audio")
89
- audio_input = gr.Audio(type="filepath", label="πŸŽ™οΈ Record Audio")
90
- submit_btn = gr.Button("Transcribe & Summarize")
91
- transcript_output = gr.Textbox(label="πŸ“„ Transcript")
92
- summary_output = gr.Textbox(label="πŸ“ Summary")
93
- save_status = gr.Textbox(label="πŸ’Ύ Save Status")
94
-
95
- with gr.Column():
96
- search_query = gr.Textbox(label="πŸ” Search Notes")
97
- search_btn = gr.Button("Search")
98
- search_output = gr.Textbox(label="Results")
99
-
100
- submit_btn.click(fn=full_pipeline, inputs=audio_input, outputs=[transcript_output, summary_output, save_status])
101
- search_btn.click(fn=search_notes, inputs=search_query, outputs=search_output)
102
-
103
- demo.launch()
 
1
+ # import gradio as gr
2
+ # import requests
3
+ # import time
4
+ # import json
5
+ # import os
6
+ # import datetime
7
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
8
+ # import torch
9
+
10
+ # ### SET YOUR ASSEMBLYAI API KEY
11
+ # ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY", "your_assemblyai_api_key")
12
+ # headers = {"authorization": ASSEMBLYAI_API_KEY}
13
+ # notes_file = "notes.json"
14
+
15
+ # ### LOAD LLM
16
+ # model_id = "IlmaJiyadh/phi3-4k-ft"
17
+ # tokenizer = AutoTokenizer.from_pretrained(model_id)
18
+ # model = AutoModelForCausalLM.from_pretrained(
19
+ # model_id,
20
+ # device_map="auto",
21
+ # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
22
+ # trust_remote_code=True
23
+ # )
24
+
25
+ # ### TRANSCRIBE AUDIO WITH ASSEMBLYAI
26
+ # def transcribe(audio_path):
27
+ # with open(audio_path, 'rb') as f:
28
+ # upload_res = requests.post("https://api.assemblyai.com/v2/upload", headers=headers, files={"file": f})
29
+ # audio_url = upload_res.json()["upload_url"]
30
+
31
+ # transcript_res = requests.post("https://api.assemblyai.com/v2/transcript", json={"audio_url": audio_url}, headers=headers)
32
+ # transcript_id = transcript_res.json()["id"]
33
+
34
+ # while True:
35
+ # poll = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers=headers).json()
36
+ # if poll['status'] == 'completed':
37
+ # return poll['text']
38
+ # elif poll['status'] == 'error':
39
+ # return f"Transcription failed: {poll['error']}"
40
+ # time.sleep(2)
41
+
42
+ # ### SUMMARIZE USING LLM
43
+ # def summarize(text):
44
+ # prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n"
45
+ # inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
+ # outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False)
47
+ # return tokenizer.decode(outputs[0], skip_special_tokens=True)
48
+
49
+ # ## SAVE TO JSON
50
+ # def save_note(date, transcript, summary):
51
+ # data = {"date": date, "transcript": transcript, "summary": summary}
52
+ # if os.path.exists(notes_file):
53
+ # with open(notes_file, "r") as f:
54
+ # all_notes = json.load(f)
55
+ # else:
56
+ # all_notes = []
57
+ # all_notes.append(data)
58
+ # with open(notes_file, "w") as f:
59
+ # json.dump(all_notes, f, indent=2)
60
+
61
+ # ### SEARCH NOTES
62
+ # def search_notes(query):
63
+ # if not os.path.exists(notes_file):
64
+ # return "No notes available yet."
65
+ # with open(notes_file, "r") as f:
66
+ # notes = json.load(f)
67
+ # results = [n for n in notes if query.lower() in n['summary'].lower() or query.lower() in n['transcript'].lower()]
68
+ # if not results:
69
+ # return "No matching notes found."
70
+ # return "\n\n".join([f"πŸ“… {n['date']}\n{n['summary']}" for n in results])
71
+
72
+ # ### FULL PIPELINE
73
+ # def full_pipeline(audio):
74
+ # if audio is None:
75
+ # return "No audio provided", "", ""
76
+
77
+ # transcript = transcribe(audio)
78
+ # summary = summarize(transcript)
79
+ # date_str = str(datetime.date.today())
80
+ # save_note(date_str, transcript, summary)
81
+ # return transcript, summary, f"βœ… Lecture saved for {date_str}"
82
+
83
+ # ### BUILD GRADIO UI
84
+ # with gr.Blocks() as demo:
85
+ # gr.Markdown("# πŸŽ“ Lecture Assistant (Audio β†’ Summary + Search)")
86
+ # with gr.Row():
87
+ # with gr.Column():
88
+ # #audio_input = gr.Audio(source="microphone", type="filepath", label="πŸŽ™οΈ Record Audio")
89
+ # audio_input = gr.Audio(type="filepath", label="πŸŽ™οΈ Record Audio")
90
+ # submit_btn = gr.Button("Transcribe & Summarize")
91
+ # transcript_output = gr.Textbox(label="πŸ“„ Transcript")
92
+ # summary_output = gr.Textbox(label="πŸ“ Summary")
93
+ # save_status = gr.Textbox(label="πŸ’Ύ Save Status")
94
+
95
+ # with gr.Column():
96
+ # search_query = gr.Textbox(label="πŸ” Search Notes")
97
+ # search_btn = gr.Button("Search")
98
+ # search_output = gr.Textbox(label="Results")
99
+
100
+ # submit_btn.click(fn=full_pipeline, inputs=audio_input, outputs=[transcript_output, summary_output, save_status])
101
+ # search_btn.click(fn=search_notes, inputs=search_query, outputs=search_output)
102
+
103
+ # demo.launch()
104
+
105
  import gradio as gr
106
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
 
107
  import torch
108
 
 
 
 
 
 
 
109
  model_id = "IlmaJiyadh/phi3-4k-ft"
110
  tokenizer = AutoTokenizer.from_pretrained(model_id)
111
  model = AutoModelForCausalLM.from_pretrained(
 
115
  trust_remote_code=True
116
  )
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def summarize(text):
119
  prompt = f"Below is a lecture transcript. Take lecture notes in bullet points.\n\nInput:\n{text}\n\nSummary:\n"
120
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
121
  outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, use_cache=False)
122
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
123
 
124
+ gr.Interface(
125
+ fn=summarize,
126
+ inputs=gr.Textbox(lines=10, label="πŸ“„ Paste Transcript"),
127
+ outputs=gr.Textbox(label="πŸ“ Summary"),
128
+ title="🧠 Transcript β†’ Summary (Phi-3 Fine-tuned)",
129
+ description="Test only the summarization step."
130
+ ).launch()
131
+