Spaces:
Sleeping
Sleeping
import base64 | |
import uuid | |
from io import BytesIO | |
from pathlib import Path | |
from typing import Optional, Tuple, Union | |
from PIL import Image, ImageDraw, ImageFont, ImageEnhance | |
import pytesseract | |
import torch | |
from transformers import BlipProcessor, BlipForConditionalGeneration | |
from diffusers import StableDiffusionPipeline | |
# Device config - prefer GPU if available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Initialize BLIP captioning model and processor once | |
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
model = BlipForConditionalGeneration.from_pretrained( | |
"Salesforce/blip-image-captioning-base" | |
).to(device) | |
# Initialize Stable Diffusion pipeline once | |
pipe = StableDiffusionPipeline.from_pretrained( | |
"CompVis/stable-diffusion-v1-4", | |
revision="fp16" if device == "cuda" else None, | |
torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
) | |
pipe.to(device) | |
def extract_text_from_image(path: Union[str, Path]) -> str: | |
"""Extract text from image file at `path` using OCR (Tesseract).""" | |
try: | |
img = Image.open(path) | |
text = pytesseract.image_to_string(img) | |
return text.strip() | |
except Exception as e: | |
return f"[Error extracting text: {e}]" | |
def caption_image(path: Union[str, Path]) -> str: | |
"""Generate a descriptive caption for image at `path` using BLIP.""" | |
try: | |
img = Image.open(path).convert("RGB") | |
inputs = processor(img, return_tensors="pt").to(device) | |
outputs = model.generate(**inputs) | |
caption = processor.decode(outputs[0], skip_special_tokens=True) | |
return caption | |
except Exception as e: | |
return f"[Error generating caption: {e}]" | |
def generate_image(prompt: str, save_path: Optional[Union[str, Path]] = None) -> Path: | |
""" | |
Generate an image from text prompt using Stable Diffusion. | |
Saves image to `save_path` or temporary file if None. | |
Returns path to saved image. | |
""" | |
if not prompt.strip(): | |
raise ValueError("Prompt must not be empty") | |
result = pipe(prompt) | |
image = result.images[0] | |
if save_path is None: | |
save_path = Path("/tmp") / f"image_{uuid.uuid4()}.png" | |
else: | |
save_path = Path(save_path) | |
image.save(save_path) | |
return save_path | |
def generate_placeholder_image( | |
prompt: str, | |
size: Tuple[int, int] = (512, 512), | |
bg_color: Tuple[int, int, int] = (173, 216, 230), | |
font_path: Optional[Union[str, Path]] = None, | |
font_size: int = 18, | |
) -> str: | |
""" | |
Create a placeholder image with the prompt text overlayed. | |
Returns base64-encoded PNG image string. | |
""" | |
img = Image.new("RGB", size, color=bg_color) | |
draw = ImageDraw.Draw(img) | |
try: | |
if font_path: | |
font = ImageFont.truetype(str(font_path), font_size) | |
else: | |
font = ImageFont.load_default() | |
except Exception: | |
font = ImageFont.load_default() | |
margin = 10 | |
max_width = size[0] - 2 * margin | |
y_text = margin | |
lines = [] | |
# Word-wrap text to fit width | |
words = prompt.split() | |
line = "" | |
for word in words: | |
test_line = f"{line} {word}".strip() | |
width, _ = draw.textsize(test_line, font=font) | |
if width <= max_width: | |
line = test_line | |
else: | |
lines.append(line) | |
line = word | |
lines.append(line) | |
for line in lines: | |
draw.text((margin, y_text), line, fill="black", font=font) | |
y_text += font.getsize(line)[1] + 4 | |
buffer = BytesIO() | |
img.save(buffer, format="PNG") | |
encoded = base64.b64encode(buffer.getvalue()).decode("utf-8") | |
return encoded | |
def generate_image_base64(prompt: str) -> str: | |
""" | |
Generate image for prompt and return base64 PNG string. | |
""" | |
image_path = generate_image(prompt) | |
with open(image_path, "rb") as f: | |
encoded = base64.b64encode(f.read()).decode("utf-8") | |
return encoded | |
def overlay_text_on_image( | |
image_path: Union[str, Path], | |
text: str, | |
position: Tuple[int, int] = (10, 10), | |
font_path: Optional[Union[str, Path]] = None, | |
font_size: int = 20, | |
color: Tuple[int, int, int] = (255, 255, 255), | |
outline_color: Tuple[int, int, int] = (0, 0, 0), | |
outline_width: int = 2, | |
) -> Image.Image: | |
""" | |
Overlay given text on image at `image_path`. | |
Supports optional font and outline. | |
Returns PIL Image object. | |
""" | |
img = Image.open(image_path).convert("RGBA") | |
txt_layer = Image.new("RGBA", img.size, (255, 255, 255, 0)) | |
draw = ImageDraw.Draw(txt_layer) | |
try: | |
if font_path: | |
font = ImageFont.truetype(str(font_path), font_size) | |
else: | |
font = ImageFont.load_default() | |
except Exception: | |
font = ImageFont.load_default() | |
x, y = position | |
# Draw outline for better visibility | |
if outline_width > 0: | |
for offset in range(-outline_width, outline_width + 1): | |
if offset == 0: | |
continue | |
draw.text((x + offset, y), text, font=font, fill=outline_color + (255,)) | |
draw.text((x, y + offset), text, font=font, fill=outline_color + (255,)) | |
draw.text((x + offset, y + offset), text, font=font, fill=outline_color + (255,)) | |
draw.text(position, text, font=font, fill=color + (255,)) | |
combined = Image.alpha_composite(img, txt_layer) | |
return combined.convert("RGB") | |
def save_overlayed_image( | |
image_path: Union[str, Path], | |
text: str, | |
output_path: Union[str, Path], | |
**overlay_kwargs | |
) -> Path: | |
""" | |
Overlay text on image at `image_path` and save to `output_path`. | |
Extra keyword args passed to overlay_text_on_image(). | |
""" | |
img = overlay_text_on_image(image_path, text, **overlay_kwargs) | |
output_path = Path(output_path) | |
img.save(output_path) | |
return output_path | |
def enhance_image_contrast(image_path: Union[str, Path], factor: float = 1.5) -> Image.Image: | |
""" | |
Enhance contrast of the image by the given factor. | |
Returns a PIL Image object. | |
""" | |
img = Image.open(image_path) | |
enhancer = ImageEnhance.Contrast(img) | |
enhanced_img = enhancer.enhance(factor) | |
return enhanced_img | |
def save_enhanced_image(image_path: Union[str, Path], output_path: Union[str, Path], factor: float = 1.5) -> Path: | |
""" | |
Enhance contrast of an image and save to output_path. | |
""" | |
enhanced_img = enhance_image_contrast(image_path, factor) | |
output_path = Path(output_path) | |
enhanced_img.save(output_path) | |
return output_path | |