import pandas as pd import numpy as np import pandas as pd from PIL import ImageDraw from datasets import load_dataset, Image from PIL import Image try: from paddleocr import PaddleOCR except: PaddleOCR = None def ocr_with_paddle(img): if PaddleOCR is None: raise ValueError('sudo apt install swig -y && pip install paddleocr==2.7.0.3 paddle-bfloat==0.1.7 paddlepaddle==2.5.2 protobuf==3.20.2') ocr = PaddleOCR(lang='en', use_angle_cls=True, show_log=False) result = ocr.ocr(img) new_result = [] if result[0] is None: return new_result for i in result[0]: new_result.append(i[:-1] + [i[-1][0], i[-1][1]]) return new_result def draw_boxes(image, bounds, color='yellow', width=2): draw = ImageDraw.Draw(image) for bound in bounds: p0, p1, p2, p3 = bound[0] draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width) return image def calculate_position(box, width, height): """Calculates the position of a bounding box within a 9-grid. Args: box: A list of coordinates representing the bounding box (e.g., [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]). width: The width of the image. height: The height of the image. Returns: A string representing the position of the box (e.g., "top-left", "center", "bottom-right"). """ x_coords = [coord[0] for coord in box] y_coords = [coord[1] for coord in box] # Calculate the center of the bounding box center_x = (min(x_coords) + max(x_coords)) / 2 center_y = (min(y_coords) + max(y_coords)) / 2 # Determine the row and column position if center_y < height / 3: row = "top" elif center_y < 2 * height / 3: row = "middle" else: row = "bottom" if center_x < width / 3: col = "left" elif center_x < 2 * width / 3: col = "center" else: col = "right" return f"{row}-{col}" def process_dataframe(df, image_width, image_height): """Processes the DataFrame to filter by score and add a position column. Args: df: The input Pandas DataFrame with 'box', 'text', and 'score' columns. image_width: The width of the image. image_height: The height of the image. Returns: A Pandas DataFrame filtered by score and with an added 'position' column. """ # Filter the DataFrame by score df_filtered = df[df['score'] > 0.9].copy() # Use .copy() to avoid SettingWithCopyWarning # Apply the position calculation and create the 'position' column df_filtered['position'] = df_filtered['box'].apply(lambda box: calculate_position(box, image_width, image_height)) return df_filtered def format_for_text_to_image_condensed(df, image_number): """Formats the DataFrame into a condensed sentence for text-to-image models, grouping text at the same position, and includes the image number (full spelling).""" if len(df) == 0: return '' ordinal_map = { 1: "first", 2: "second", 3: "third", 4: "fourth", 5: "fifth", 6: "sixth", 7: "seventh", 8: "eighth", 9: "ninth", 10: "tenth", 11: "eleventh", 12: "twelfth", 13: "thirteenth", 14: "fourteenth", 15: "fifteenth", 16: "sixteenth", 17: "seventeenth", 18: "eighteenth", 19: "nineteenth", 20: "twentieth" } ordinal = ordinal_map.get(image_number, None) # Use number as string if not in map assert ordinal is not None, "ordinal is not None" position_to_texts = {} for index, row in df.iterrows(): position = row['position'] text = row['text'] if position in position_to_texts: position_to_texts[position].append(text) else: position_to_texts[position] = [text] sentences = [f'In the {ordinal} image: ('] for position, texts in position_to_texts.items(): quoted_texts = [f"\"{text}\"" for text in texts] # Quote each text text_string = ", ".join(quoted_texts) # Join with commas sentences.append(f"The texts {text_string} are located at the {position} of the {ordinal} image.") return " ".join(sentences) + ' )' def get_ocr_result(img_path: str, img_index: int = 0): img_index = img_index + 1 ocr_result = ocr_with_paddle(img_path) ocr_result_df = pd.DataFrame(ocr_result, columns=['box', 'text', 'score']) image_width, image_height = Image.open(img_path).size df_processed = process_dataframe(ocr_result_df, image_width, image_height) formatted_sentence = format_for_text_to_image_condensed(df_processed, image_number=img_index) return formatted_sentence