task2 / app.py
thorfine's picture
Update app.py
172761a verified
raw
history blame
886 Bytes
import gradio as gr
from transformers import AutoProcessor, Blip2ForConditionalGeneration, BitsAndBytesConfig,Blip2Processor
from gtts import gTTS
from tempfile import NamedTemporaryFile
from PIL import Image
import torch
import os
import torchaudio
import whisper
# Load BLIP-2 model
device = "cuda" if torch.cuda.is_available() else "cpu"
quant_config = BitsAndBytesConfig(load_in_8bit=True)
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl", device_map="auto")
# Load Whisper model (turbo version)
whisper_model = whisper.load_model("small")
def transcribe(audio):
# Use Whisper for transcription
result = whisper_model.transcribe(audio)
return result["text"]
from PIL import Image
import torch
from gtts import gTTS
from tempfile import NamedTemporaryFile