Spaces:

geekyrakshit
/

line-art-data-annotation

Running

File size: 3,001 Bytes

37ddc9f
8981d3d
2d7613a
 
37ddc9f
47e7d4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d7613a
16ff55e
2d7613a
 
 
 
 
47e7d4f
2d7613a
 
 
47e7d4f
 
e5261b7
 
2d7613a
 
 
 
 
 
 
 
 
 
47e7d4f
 
 
 
2d7613a
 
 
 
 
 
 
 
47e7d4f
 
 
e5261b7
47e7d4f
 
 
e5261b7
47e7d4f
 
e5261b7
37ddc9f

import io
import streamlit as st
import fitz
from PIL import Image
from datasets import Dataset
from streamlit_cropper import st_cropper


def get_padded_image(image, cropped_image):
    base_w, base_h = image.size
    crop_w, crop_h = cropped_image.size
    # Match modes to avoid paste issues
    crop_img = (
        cropped_image.convert(image.mode)
        if cropped_image.mode != image.mode
        else cropped_image
    )
    # Create white background canvas matching the original image size
    padded_image = Image.new(image.mode, (base_w, base_h), color="white")
    # Center the cropped image on the canvas
    paste_x = max(0, (base_w - crop_w) // 2)
    paste_y = max(0, (base_h - crop_h) // 2)
    padded_image.paste(crop_img, (paste_x, paste_y))
    return padded_image


st.header("Line Art Data Annotation App")
uploaded_pdf = st.sidebar.file_uploader("Upload a PDF", type=["pdf"])

if uploaded_pdf:
    data = uploaded_pdf.read()
    doc = fitz.open(stream=data, filetype="pdf")

    # Initialize page index in session state
    if "page_idx" not in st.session_state:
        st.session_state.page_idx = 0
    if "cropped_images" not in st.session_state:
        st.session_state.cropped_images = []
    if "captions" not in st.session_state:
        st.session_state.captions = []

    total_pages = doc.page_count
    page_idx = st.session_state.page_idx % total_pages
    # Navigation buttons (placed above the image)
    col_prev, col_caption, col_next = st.columns([1, 8, 1])
    with col_prev:
        if st.button("<"):
            st.session_state.page_idx = (page_idx - 1) % total_pages
            st.rerun()
    with col_caption:
        st.markdown(
            f"<center>Page {page_idx}/{total_pages - 1}</center>",
            unsafe_allow_html=True,
        )
    with col_next:
        if st.button("\>"):
            st.session_state.page_idx = (page_idx + 1) % total_pages
            st.rerun()

    # Render image after controls
    page = doc.load_page(page_idx)
    pix = page.get_pixmap(dpi=200)
    image = Image.open(io.BytesIO(pix.tobytes("png")))
    cropped_image = st_cropper(image, realtime_update=True)
    st.image(cropped_image)
    caption = st.text_input("Caption", key="caption")
    if st.button("Save"):
        padded_image = get_padded_image(image, cropped_image)
        st.session_state.cropped_images.append(padded_image)
        st.session_state.captions.append(caption)
        print(f"{len(st.session_state.cropped_images)=}")
        print(f"{st.session_state.cropped_images[-1].size=}")
        print(f"{st.session_state.captions[-1]=}")

huggingface_dataset_address = st.sidebar.text_input("Hugging Face Dataset Address")
if st.sidebar.button("Save to HuggingFace"):
    dataset = Dataset.from_dict(
        {
            "image": st.session_state.cropped_images,
            "caption": st.session_state.captions,
        },
    )
    dataset.push_to_hub(huggingface_dataset_address)
    st.success("Dataset saved to Hugging Face")