Spaces:
Running
Running
File size: 9,814 Bytes
a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 a383d0e 0246ff9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
from utils import encode_image, Doubao, Qwen_2_5_VL
from PIL import Image
import bs4
from threading import Thread
import time
import argparse
import json
import os
# This dictionary holds the user's instructions for the current run.
user_instruction = {"sidebar": "", "header": "", "navigation": "", "main content": ""}
def get_args():
parser = argparse.ArgumentParser(description="Generates an HTML layout from bounding box data.")
parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
parser.add_argument('--instructions', type=str, help='A JSON string of instructions for different components.')
return parser.parse_args()
def get_prompt_dict(instructions):
"""Dynamically creates the prompt dictionary with the user's instructions."""
return {
"sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["sidebar"]}. The following is the code for filling in:
<div>
your code here
</div>,
only return the code within the <div> and </div> tags""",
"header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["header"]}. The following is the code for filling in:
<div>
your code here
</div>,
only return the code within the <div> and </div> tags""",
"navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. User instruction: {instructions["navigation"]}. The following is the code for filling in:
<div>
your code here
</div>,
only return the code within the <div> and </div> tags""",
"main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["main content"]}. The following is the code for filling in:
<div>
your code here
</div>,
only return the code within the <div> and </div> tags""",
}
def generate_code(bbox_tree, img_path, bot, instructions):
"""Generates code for each leaf node in the bounding box tree."""
img = Image.open(img_path)
code_dict = {}
prompt_dict = get_prompt_dict(instructions)
def _generate_code(node):
if not node.get("children"): # It's a leaf node
bbox = node["bbox"]
cropped_img = img.crop(bbox)
node_type = node.get("type")
if node_type and node_type in prompt_dict:
prompt = prompt_dict[node_type]
try:
code = bot.ask(prompt, encode_image(cropped_img))
code_dict[node["id"]] = code
except Exception as e:
print(f"Error generating code for {node_type}: {e}")
else:
print(f"Node type '{node_type}' not found or invalid.")
else:
for child in node["children"]:
_generate_code(child)
_generate_code(bbox_tree)
return code_dict
def generate_html(bbox_tree, output_file):
"""Generates an HTML file with nested containers based on the bounding box tree."""
html_template_start = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Bounding Boxes Layout</title>
<style>
body, html {
margin: 0;
padding: 0;
width: 100%;
height: 100%;
}
.container {
position: relative;
width: 100%;
height: 100%;
box-sizing: border-box;
}
.box {
position: absolute;
box-sizing: border-box;
overflow: hidden;
}
.box > .container {
display: grid;
width: 100%;
height: 100%;
}
</style>
<link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
</head>
<body>
<div class="container">
"""
html_template_end = """
</div>
</body>
</html>
"""
def process_bbox(node, parent_width, parent_height, parent_left, parent_top):
bbox = node['bbox']
children = node.get('children', [])
node_id = node['id']
left = (bbox[0] - parent_left) / parent_width * 100
top = (bbox[1] - parent_top) / parent_height * 100
width = (bbox[2] - bbox[0]) / parent_width * 100
height = (bbox[3] - bbox[1]) / parent_height * 100
html = f'<div id="{node_id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">'
if children:
html += '<div class="container">'
current_width = bbox[2] - bbox[0]
current_height = bbox[3] - bbox[1]
for child in children:
html += process_bbox(child, current_width, current_height, bbox[0], bbox[1])
html += '</div>'
html += '</div>'
return html
root_bbox = bbox_tree['bbox']
root_children = bbox_tree.get('children', [])
root_width = root_bbox[2] - root_bbox[0]
root_height = root_bbox[3] - root_bbox[1]
html_content = html_template_start
for child in root_children:
html_content += process_bbox(child, root_width, root_height, root_bbox[0], root_bbox[1])
html_content += html_template_end
with open(output_file, 'w') as f:
f.write(bs4.BeautifulSoup(html_content, 'html.parser').prettify())
def code_substitution(html_file, code_dict):
"""Substitutes the generated code into the HTML file."""
with open(html_file, "r") as f:
soup = bs4.BeautifulSoup(f.read(), 'html.parser')
for node_id, code in code_dict.items():
div = soup.find(id=node_id)
if div:
div.append(bs4.BeautifulSoup(code.replace("```html", "").replace("```", ""), 'html.parser'))
with open(html_file, "w") as f:
f.write(soup.prettify())
def main():
args = get_args()
if args.instructions:
try:
user_instruction.update(json.loads(args.instructions))
except json.JSONDecodeError:
print("Error: Could not decode instructions JSON.")
# --- Dynamic Path Construction ---
base_dir = os.path.dirname(os.path.abspath(__file__))
tmp_dir = os.path.join(base_dir, 'data', 'tmp', args.run_id)
output_dir = os.path.join(base_dir, 'data', 'output', args.run_id)
os.makedirs(output_dir, exist_ok=True)
input_json_path = os.path.join(tmp_dir, f"{args.run_id}_bboxes.json")
img_path = os.path.join(tmp_dir, f"{args.run_id}.png")
output_html_path = os.path.join(output_dir, f"{args.run_id}_layout.html")
if not os.path.exists(input_json_path) or not os.path.exists(img_path):
print("Error: Input bbox JSON or image file not found.")
exit(1)
print(f"--- Starting HTML Generation for run_id: {args.run_id} ---")
with open(input_json_path, 'r') as f:
boxes_data = json.load(f)
with Image.open(img_path) as img:
width, height = img.size
root = {"bbox": [0, 0, width, height], "children": [], "id": 0}
# Convert normalized bboxes to pixel coordinates
for name, norm_bbox in boxes_data.items():
x1 = int(norm_bbox[0] * width / 1000)
y1 = int(norm_bbox[1] * height / 1000)
x2 = int(norm_bbox[2] * width / 1000)
y2 = int(norm_bbox[3] * height / 1000)
root["children"].append({"bbox": [x1, y1, x2, y2], "type": name, "children": []})
# Assign unique IDs to all nodes for code substitution
next_id = 1
for child in root["children"]:
child["id"] = next_id
next_id += 1
generate_html(root, output_html_path)
api_path = os.path.join(base_dir, "doubao_api.txt")
if not os.path.exists(api_path):
print(f"Error: API key not found at {api_path}")
exit(1)
bot = Doubao(api_path, model="doubao-1.5-thinking-vision-pro-250428")
code_dict = generate_code(root, img_path, bot, user_instruction)
code_substitution(output_html_path, code_dict)
print(f"HTML layout with generated content saved to {os.path.basename(output_html_path)}")
print(f"--- HTML Generation Complete for run_id: {args.run_id} ---")
if __name__ == "__main__":
main() |