import io import streamlit as st import fitz from PIL import Image from datasets import Dataset from streamlit_cropper import st_cropper def get_padded_image(image, cropped_image): base_w, base_h = image.size crop_w, crop_h = cropped_image.size # Match modes to avoid paste issues crop_img = ( cropped_image.convert(image.mode) if cropped_image.mode != image.mode else cropped_image ) # Create white background canvas matching the original image size padded_image = Image.new(image.mode, (base_w, base_h), color="white") # Center the cropped image on the canvas paste_x = max(0, (base_w - crop_w) // 2) paste_y = max(0, (base_h - crop_h) // 2) padded_image.paste(crop_img, (paste_x, paste_y)) return padded_image st.header("Line Art Data Annotation App") uploaded_pdf = st.sidebar.file_uploader("Upload a PDF", type=["pdf"]) if uploaded_pdf: data = uploaded_pdf.read() doc = fitz.open(stream=data, filetype="pdf") # Initialize page index in session state if "page_idx" not in st.session_state: st.session_state.page_idx = 0 if "cropped_images" not in st.session_state: st.session_state.cropped_images = [] if "captions" not in st.session_state: st.session_state.captions = [] total_pages = doc.page_count page_idx = st.session_state.page_idx % total_pages # Navigation buttons (placed above the image) col_prev, col_caption, col_next = st.columns([1, 8, 1]) with col_prev: if st.button("<"): st.session_state.page_idx = (page_idx - 1) % total_pages st.rerun() with col_caption: st.markdown( f"
Page {page_idx}/{total_pages - 1}
", unsafe_allow_html=True, ) with col_next: if st.button("\>"): st.session_state.page_idx = (page_idx + 1) % total_pages st.rerun() # Render image after controls page = doc.load_page(page_idx) pix = page.get_pixmap(dpi=200) image = Image.open(io.BytesIO(pix.tobytes("png"))) cropped_image = st_cropper(image, realtime_update=True) st.image(cropped_image) caption = st.text_input("Caption", key="caption") if st.button("Save"): padded_image = get_padded_image(image, cropped_image) st.session_state.cropped_images.append(padded_image) st.session_state.captions.append(caption) print(f"{len(st.session_state.cropped_images)=}") print(f"{st.session_state.cropped_images[-1].size=}") print(f"{st.session_state.captions[-1]=}") huggingface_dataset_address = st.sidebar.text_input("Hugging Face Dataset Address") if st.sidebar.button("Save to HuggingFace"): dataset = Dataset.from_dict( { "image": st.session_state.cropped_images, "caption": st.session_state.captions, }, ) dataset.push_to_hub(huggingface_dataset_address) st.success("Dataset saved to Hugging Face")