Spaces:

Jimmyzheng-10
/

ScreenCoder

Running

App Files Files Community

ScreenCoder / screencoder /UIED /run_single.py

Jimmyzheng-10

Update

0246ff9 21 days ago

raw

history blame contribute delete

5.7 kB

	import cv2
	import os
	import numpy as np
	import multiprocessing
	import argparse
	from os.path import join as pjoin

	def get_args():
	parser = argparse.ArgumentParser(description="Processes a single image for UI element detection.")
	parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
	return parser.parse_args()

	def resize_height_by_longest_edge(img_path, resize_length=800):
	org = cv2.imread(img_path)
	height, width = org.shape[:2]
	if height > width:
	return resize_length
	else:
	return int(resize_length * (height / width))


	def color_tips():
	color_map = {'Text': (0, 0, 255), 'Compo': (0, 255, 0), 'Block': (0, 255, 255), 'Text Content': (255, 0, 255)}
	board = np.zeros((200, 200, 3), dtype=np.uint8)

	board[:50, :, :] = (0, 0, 255)
	board[50:100, :, :] = (0, 255, 0)
	board[100:150, :, :] = (255, 0, 255)
	board[150:200, :, :] = (0, 255, 255)
	cv2.putText(board, 'Text', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	cv2.putText(board, 'Non-text Compo', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	cv2.putText(board, "Compo's Text Content", (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	cv2.putText(board, "Block", (10, 170), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
	cv2.imshow('colors', board)


	if __name__ == '__main__':
	args = get_args()

	# --- Dynamic Path Construction ---
	# Construct paths based on the provided run_id
	base_dir = os.path.dirname(os.path.abspath(__file__))
	run_id = args.run_id

	# The temporary directory for this specific run
	tmp_dir = os.path.join(base_dir, '..', 'data', 'tmp', run_id)

	# Input image path
	input_path_img = os.path.join(tmp_dir, f"{run_id}.png")

	# Output directory for this script's results
	output_root = tmp_dir # All results (ip, ocr, etc.) will go into the run's tmp subdir.

	if not os.path.exists(input_path_img):
	print(f"Error: Input image not found at {input_path_img}")
	exit(1)

	print(f"--- Starting UIED processing for run_id: {run_id} ---")
	print(f"Input image: {input_path_img}")
	print(f"Output root: {output_root}")
	# Set multiprocessing start method to 'spawn' for macOS compatibility.
	# This must be done at the very beginning of the main block.
	try:
	multiprocessing.set_start_method('spawn', force=True)
	except RuntimeError:
	pass # It's OK if it's already set.

	# Disable multiprocessing for PaddleOCR to avoid segmentation fault on macOS
	import os
	os.environ['PADDLE_USE_MULTIPROCESSING'] = '0'

	'''
	ele:min-grad: gradient threshold to produce binary map
	ele:ffl-block: fill-flood threshold
	ele:min-ele-area: minimum area for selected elements
	ele:merge-contained-ele: if True, merge elements contained in others
	text:max-word-inline-gap: words with smaller distance than the gap are counted as a line
	text:max-line-gap: lines with smaller distance than the gap are counted as a paragraph

	Tips:
	1. Larger min-grad produces fine-grained binary-map while prone to over-segment element to small pieces
	2. Smaller min-ele-area leaves tiny elements while prone to produce noises
	3. If not merge-contained-ele, the elements inside others will be recognized, while prone to produce noises
	4. The max-word-inline-gap and max-line-gap should be dependent on the input image size and resolution

	mobile: {'min-grad':4, 'ffl-block':5, 'min-ele-area':50, 'max-word-inline-gap':6, 'max-line-gap':1}
	web : {'min-grad':3, 'ffl-block':5, 'min-ele-area':25, 'max-word-inline-gap':4, 'max-line-gap':4}
	'''
	key_params = {'min-grad':10, 'ffl-block':5, 'min-ele-area':50,
	'merge-contained-ele':True, 'merge-line-to-paragraph':False, 'remove-bar':True}

	# set input image path
	# input_path_img = 'data/test1.png'
	# output_root = 'data'

	resized_height = resize_height_by_longest_edge(input_path_img, resize_length=800)
	# color_tips() # This shows a window, which is not suitable for a script.

	is_ip = True
	is_clf = False
	is_ocr = False
	is_merge = False

	if is_ocr:
	import detect_text.text_detection as text
	os.makedirs(pjoin(output_root, 'ocr'), exist_ok=True)
	text.text_detection(input_path_img, output_root, show=True, method='paddle')

	if is_ip:
	import detect_compo.ip_region_proposal as ip
	os.makedirs(pjoin(output_root, 'ip'), exist_ok=True)
	# switch of the classification func
	classifier = None
	if is_clf:
	classifier = {}
	from cnn.CNN import CNN
	# classifier['Image'] = CNN('Image')
	classifier['Elements'] = CNN('Elements')
	# classifier['Noise'] = CNN('Noise')
	ip.compo_detection(input_path_img, output_root, key_params,
	classifier=classifier, resize_by_height=resized_height, show=False)

	if is_merge:
	import detect_merge.merge as merge
	os.makedirs(pjoin(output_root, 'merge'), exist_ok=True)
	name = input_path_img.split('/')[-1][:-4]
	compo_path = pjoin(output_root, 'ip', str(name) + '.json')
	ocr_path = pjoin(output_root, 'ocr', str(name) + '.json')
	merge.merge(input_path_img, compo_path, ocr_path, pjoin(output_root, 'merge'),
	is_remove_bar=key_params['remove-bar'], is_paragraph=key_params['merge-line-to-paragraph'], show=False)

	print(f"--- UIED processing complete for run_id: {run_id} ---")