Spaces:

nickkun
/

Vision_Transformer-Segmentation

Sleeping

File size: 2,990 Bytes

7b4f310
 
 
69db8f3
7b4f310
606bdc0
69db8f3
7b4f310
69db8f3
 
606bdc0
69db8f3

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Nikhil Kunjoor
"""
import gradio as gr
from transformers import pipeline
from PIL import Image, ImageFilter
import numpy as np
import torch

# Load models from Hugging Face
segmentation_model = pipeline("image-segmentation", model="nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
depth_estimator = pipeline("depth-estimation", model="Intel/dpt-large")

def apply_gaussian_blur(image, mask, sigma):
    blurred = image.filter(ImageFilter.GaussianBlur(sigma))
    return Image.composite(image, blurred, mask)

def apply_lens_blur(image, depth_map, sigma):
    depth_array = np.array(depth_map)
    normalized_depth = (depth_array - np.min(depth_array)) / (np.max(depth_array) - np.min(depth_array))
    
    blurred = image.copy()
    for x in range(image.width):
        for y in range(image.height):
            blur_intensity = normalized_depth[y, x] * sigma
            local_blur = image.crop((x-1, y-1, x+2, y+2)).filter(ImageFilter.GaussianBlur(blur_intensity))
            blurred.putpixel((x, y), local_blur.getpixel((1, 1)))
    return blurred

def process_image(image, blur_type, sigma):
    # Perform segmentation
    segmentation_results = segmentation_model(image)
    person_mask = None
    for segment in segmentation_results:
        if segment['label'] == 'person':
            person_mask = Image.fromarray((segment['mask'] * 255).astype(np.uint8))
            break
    
    if person_mask is None:
        person_mask = Image.new('L', image.size, 255)  # Create a white mask if no person is detected
    
    # Perform depth estimation
    depth_results = depth_estimator(image)
    depth_map = depth_results["depth"]
    
    # Normalize depth map for visualization
    depth_array = np.array(depth_map)
    normalized_depth = (depth_array - np.min(depth_array)) / (np.max(depth_array) - np.min(depth_array)) * 255
    depth_visualization = Image.fromarray(normalized_depth.astype(np.uint8))
    
    # Apply selected blur effect
    if blur_type == "Gaussian Blur":
        output_image = apply_gaussian_blur(image, person_mask, sigma)
    else:  # Lens Blur
        output_image = apply_lens_blur(image, depth_map, sigma)
    
    return person_mask, depth_visualization, output_image

# Create Gradio interface
iface = gr.Interface(
    fn=process_image,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Radio(["Gaussian Blur", "Lens Blur"], label="Blur Type", value="Gaussian Blur"),
        gr.Slider(0, 50, step=1, label="Blur Intensity (Sigma)", value=15)
    ],
    outputs=[
        gr.Image(type="pil", label="Segmentation Mask"),
        gr.Image(type="pil", label="Depth Map"),
        gr.Image(type="pil", label="Output Image")
    ],
    title="Vision Transformer Segmentation & Depth-Based Blur Effects",
    description="Upload an image to apply segmentation and lens blur effects. Adjust the blur type and intensity using the controls below."
)

iface.launch()