File size: 886 Bytes
10458eb 172761a 10458eb 172761a 10458eb 172761a 10458eb 172761a 10458eb 172761a 10458eb 172761a 10458eb 172761a 10458eb 172761a 10458eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import gradio as gr
from transformers import AutoProcessor, Blip2ForConditionalGeneration, BitsAndBytesConfig,Blip2Processor
from gtts import gTTS
from tempfile import NamedTemporaryFile
from PIL import Image
import torch
import os
import torchaudio
import whisper
# Load BLIP-2 model
device = "cuda" if torch.cuda.is_available() else "cpu"
quant_config = BitsAndBytesConfig(load_in_8bit=True)
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xl", device_map="auto")
# Load Whisper model (turbo version)
whisper_model = whisper.load_model("small")
def transcribe(audio):
# Use Whisper for transcription
result = whisper_model.transcribe(audio)
return result["text"]
from PIL import Image
import torch
from gtts import gTTS
from tempfile import NamedTemporaryFile
|