File size: 3,001 Bytes
37ddc9f
8981d3d
2d7613a
 
37ddc9f
47e7d4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d7613a
16ff55e
2d7613a
 
 
 
 
47e7d4f
2d7613a
 
 
47e7d4f
 
e5261b7
 
2d7613a
 
 
 
 
 
 
 
 
 
47e7d4f
 
 
 
2d7613a
 
 
 
 
 
 
 
47e7d4f
 
 
e5261b7
47e7d4f
 
 
e5261b7
47e7d4f
 
e5261b7
37ddc9f
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import io
import streamlit as st
import fitz
from PIL import Image
from datasets import Dataset
from streamlit_cropper import st_cropper


def get_padded_image(image, cropped_image):
    base_w, base_h = image.size
    crop_w, crop_h = cropped_image.size
    # Match modes to avoid paste issues
    crop_img = (
        cropped_image.convert(image.mode)
        if cropped_image.mode != image.mode
        else cropped_image
    )
    # Create white background canvas matching the original image size
    padded_image = Image.new(image.mode, (base_w, base_h), color="white")
    # Center the cropped image on the canvas
    paste_x = max(0, (base_w - crop_w) // 2)
    paste_y = max(0, (base_h - crop_h) // 2)
    padded_image.paste(crop_img, (paste_x, paste_y))
    return padded_image


st.header("Line Art Data Annotation App")
uploaded_pdf = st.sidebar.file_uploader("Upload a PDF", type=["pdf"])

if uploaded_pdf:
    data = uploaded_pdf.read()
    doc = fitz.open(stream=data, filetype="pdf")

    # Initialize page index in session state
    if "page_idx" not in st.session_state:
        st.session_state.page_idx = 0
    if "cropped_images" not in st.session_state:
        st.session_state.cropped_images = []
    if "captions" not in st.session_state:
        st.session_state.captions = []

    total_pages = doc.page_count
    page_idx = st.session_state.page_idx % total_pages
    # Navigation buttons (placed above the image)
    col_prev, col_caption, col_next = st.columns([1, 8, 1])
    with col_prev:
        if st.button("<"):
            st.session_state.page_idx = (page_idx - 1) % total_pages
            st.rerun()
    with col_caption:
        st.markdown(
            f"<center>Page {page_idx}/{total_pages - 1}</center>",
            unsafe_allow_html=True,
        )
    with col_next:
        if st.button("\>"):
            st.session_state.page_idx = (page_idx + 1) % total_pages
            st.rerun()

    # Render image after controls
    page = doc.load_page(page_idx)
    pix = page.get_pixmap(dpi=200)
    image = Image.open(io.BytesIO(pix.tobytes("png")))
    cropped_image = st_cropper(image, realtime_update=True)
    st.image(cropped_image)
    caption = st.text_input("Caption", key="caption")
    if st.button("Save"):
        padded_image = get_padded_image(image, cropped_image)
        st.session_state.cropped_images.append(padded_image)
        st.session_state.captions.append(caption)
        print(f"{len(st.session_state.cropped_images)=}")
        print(f"{st.session_state.cropped_images[-1].size=}")
        print(f"{st.session_state.captions[-1]=}")

huggingface_dataset_address = st.sidebar.text_input("Hugging Face Dataset Address")
if st.sidebar.button("Save to HuggingFace"):
    dataset = Dataset.from_dict(
        {
            "image": st.session_state.cropped_images,
            "caption": st.session_state.captions,
        },
    )
    dataset.push_to_hub(huggingface_dataset_address)
    st.success("Dataset saved to Hugging Face")