line-art-data-annotation / src /streamlit_app.py
geekyrakshit's picture
update: annotation app
37ddc9f
import io
import streamlit as st
import fitz
from PIL import Image
from datasets import Dataset
from streamlit_cropper import st_cropper
def get_padded_image(image, cropped_image):
base_w, base_h = image.size
crop_w, crop_h = cropped_image.size
# Match modes to avoid paste issues
crop_img = (
cropped_image.convert(image.mode)
if cropped_image.mode != image.mode
else cropped_image
)
# Create white background canvas matching the original image size
padded_image = Image.new(image.mode, (base_w, base_h), color="white")
# Center the cropped image on the canvas
paste_x = max(0, (base_w - crop_w) // 2)
paste_y = max(0, (base_h - crop_h) // 2)
padded_image.paste(crop_img, (paste_x, paste_y))
return padded_image
st.header("Line Art Data Annotation App")
uploaded_pdf = st.sidebar.file_uploader("Upload a PDF", type=["pdf"])
if uploaded_pdf:
data = uploaded_pdf.read()
doc = fitz.open(stream=data, filetype="pdf")
# Initialize page index in session state
if "page_idx" not in st.session_state:
st.session_state.page_idx = 0
if "cropped_images" not in st.session_state:
st.session_state.cropped_images = []
if "captions" not in st.session_state:
st.session_state.captions = []
total_pages = doc.page_count
page_idx = st.session_state.page_idx % total_pages
# Navigation buttons (placed above the image)
col_prev, col_caption, col_next = st.columns([1, 8, 1])
with col_prev:
if st.button("<"):
st.session_state.page_idx = (page_idx - 1) % total_pages
st.rerun()
with col_caption:
st.markdown(
f"<center>Page {page_idx}/{total_pages - 1}</center>",
unsafe_allow_html=True,
)
with col_next:
if st.button("\>"):
st.session_state.page_idx = (page_idx + 1) % total_pages
st.rerun()
# Render image after controls
page = doc.load_page(page_idx)
pix = page.get_pixmap(dpi=200)
image = Image.open(io.BytesIO(pix.tobytes("png")))
cropped_image = st_cropper(image, realtime_update=True)
st.image(cropped_image)
caption = st.text_input("Caption", key="caption")
if st.button("Save"):
padded_image = get_padded_image(image, cropped_image)
st.session_state.cropped_images.append(padded_image)
st.session_state.captions.append(caption)
print(f"{len(st.session_state.cropped_images)=}")
print(f"{st.session_state.cropped_images[-1].size=}")
print(f"{st.session_state.captions[-1]=}")
huggingface_dataset_address = st.sidebar.text_input("Hugging Face Dataset Address")
if st.sidebar.button("Save to HuggingFace"):
dataset = Dataset.from_dict(
{
"image": st.session_state.cropped_images,
"caption": st.session_state.captions,
},
)
dataset.push_to_hub(huggingface_dataset_address)
st.success("Dataset saved to Hugging Face")