Jimmyzheng-10 commited on
Commit
1cc14d1
·
1 Parent(s): 0246ff9
app.py CHANGED
@@ -3,9 +3,15 @@ import os
3
  import tempfile
4
  import cv2
5
  import numpy as np
 
6
  from screencoder.main import generate_html_for_demo
 
 
 
 
 
7
 
8
- # Manually defined examples
9
  examples_data = [
10
  [
11
  "screencoder/data/input/test1.png",
@@ -33,97 +39,251 @@ examples_data = [
33
  ],
34
  ]
35
 
36
- def process_image_and_prompt(image_np, image_path_from_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  final_image_path = ""
38
  is_temp_file = False
39
-
40
  if image_path_from_state:
41
  final_image_path = image_path_from_state
42
- print(f"Processing example image from: {final_image_path}")
43
  elif image_np is not None:
44
  is_temp_file = True
45
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
46
- image_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
47
- cv2.imwrite(tmp.name, image_bgr)
48
  final_image_path = tmp.name
49
- print(f"Processing uploaded image from temporary file: {final_image_path}")
50
  else:
51
- return "<html><body><h1 style='font-family: sans-serif; text-align: center; margin-top: 40px;'>Please provide an image.</h1></body></html>", ""
52
 
53
  instructions = {
54
- "sidebar": sidebar_prompt,
55
- "header": header_prompt,
56
- "navigation": navigation_prompt,
57
- "main content": main_content_prompt
58
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- print(f"With instructions: {instructions}")
61
- html_content = generate_html_for_demo(final_image_path, instructions)
 
 
 
 
62
 
63
  if is_temp_file:
64
  os.unlink(final_image_path)
65
-
66
- return html_content, html_content
67
 
68
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), fill_height=True) as demo:
69
- active_image_path_state = gr.State(value=examples_data[0][5] if examples_data else None)
70
 
 
71
  gr.Markdown("# ScreenCoder: Screenshot to Code")
72
-
73
- with gr.Row(equal_height=True):
74
  with gr.Column(scale=1):
75
  gr.Markdown("### Step 1: Provide an Image")
76
-
77
- active_image = gr.Image(
78
- type="numpy",
79
- height=300,
80
- value=examples_data[0][0] if examples_data else None
81
- )
82
-
83
- upload_button = gr.UploadButton("Click to Upload or Drag-and-Drop", file_types=["image"], variant="primary")
84
 
85
  gr.Markdown("### Step 2: Write Prompts (Optional)")
86
- with gr.Accordion("Component-specific Prompts", open=True):
87
- sidebar_prompt = gr.Textbox(label="Sidebar Prompt", placeholder="Instructions for the sidebar...", value="")
88
- header_prompt = gr.Textbox(label="Header Prompt", placeholder="Instructions for the header...", value="")
89
- navigation_prompt = gr.Textbox(label="Navigation Prompt", placeholder="Instructions for the navigation...", value="")
90
- main_content_prompt = gr.Textbox(label="Main Content Prompt", placeholder="Instructions for the main content...", value="")
91
 
92
- generate_btn = gr.Button("Generate HTML", variant="primary", scale=2)
93
 
94
  with gr.Column(scale=2):
95
  with gr.Tabs():
96
  with gr.TabItem("Preview"):
97
- html_preview = gr.HTML(label="Live Preview", elem_id="html-preview")
 
 
 
 
 
 
98
  with gr.TabItem("Code"):
99
- html_code_output = gr.Code(label="Generated Code", language="html", elem_id="html-code")
 
 
 
 
 
 
 
 
 
 
100
 
101
- if examples_data:
102
- gr.Examples(
103
- examples=examples_data,
104
- fn=lambda *args: args, # Simply return all inputs
105
- inputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt, active_image_path_state],
106
- outputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt, active_image_path_state],
107
- label="Click an example to try it out",
108
- cache_examples=False,
109
- )
110
-
111
- def handle_upload(uploaded_image_np):
112
- """On upload, update image, clear state, and set empty prompts."""
113
- return uploaded_image_np, None, "", "", "", ""
114
 
115
- upload_button.upload(
116
- fn=handle_upload,
117
- inputs=upload_button,
118
- outputs=[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt]
119
  )
 
 
 
 
 
120
 
121
  generate_btn.click(
122
- fn=process_image_and_prompt,
123
- inputs=[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
124
- outputs=[html_preview, html_code_output],
125
  show_progress="full"
126
  )
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  if __name__ == "__main__":
129
- demo.launch()
 
3
  import tempfile
4
  import cv2
5
  import numpy as np
6
+ import urllib.parse
7
  from screencoder.main import generate_html_for_demo
8
+ from PIL import Image
9
+ import shutil
10
+ import html
11
+ import base64
12
+ from bs4 import BeautifulSoup
13
 
14
+ # Predefined examples
15
  examples_data = [
16
  [
17
  "screencoder/data/input/test1.png",
 
39
  ],
40
  ]
41
 
42
+ TAILWIND_SCRIPT = "<script src='https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4'></script>"
43
+
44
+ def image_to_data_url(image_path):
45
+ """Convert an image file to a data URL for embedding in HTML."""
46
+ try:
47
+ with open(image_path, 'rb') as img_file:
48
+ img_data = img_file.read()
49
+ # Detect image type from file extension
50
+ ext = os.path.splitext(image_path)[1].lower()
51
+ mime_type = {
52
+ '.png': 'image/png',
53
+ '.jpg': 'image/jpeg',
54
+ '.jpeg': 'image/jpeg',
55
+ '.gif': 'image/gif',
56
+ '.webp': 'image/webp'
57
+ }.get(ext, 'image/png')
58
+
59
+ encoded = base64.b64encode(img_data).decode('utf-8')
60
+ return f'data:{mime_type};base64,{encoded}'
61
+ except Exception as e:
62
+ print(f"Error converting image to data URL: {e}")
63
+ return None
64
+
65
+ def render_preview(code: str, width: int, height: int, scale: float) -> str:
66
+ """
67
+ Preview renderer with both width and height control for the inner canvas.
68
+ """
69
+ safe_code = html.escape(code).replace("'", "&apos;")
70
+ iframe_html = f"""
71
+ <div style="width: 100%; max-width: 1920px; margin: 0 auto; overflow-x: auto; overflow-y: hidden;">
72
+ <div style="
73
+ width: 1920px;
74
+ height: 1000px;
75
+ margin: 0 auto;
76
+ display: flex;
77
+ justify-content: center;
78
+ align-items: center;
79
+ border: 1px solid #ddd;
80
+ overflow: hidden;
81
+ background: #f9fafb;
82
+ position: relative;
83
+ box-shadow: 0 4px 12px rgba(0,0,0,0.1);">
84
+ <div style="
85
+ width: {width}px;
86
+ height: {height}px;
87
+ transform: scale({scale});
88
+ transform-origin: top left;
89
+ border: none;
90
+ position: relative;">
91
+ <iframe
92
+ style="width: 100%; height: 100%; border: none; display: block;"
93
+ srcdoc='{safe_code}'>
94
+ </iframe>
95
+ </div>
96
+ </div>
97
+ </div>
98
+ """
99
+ return iframe_html
100
+
101
+ def process_and_generate(image_np, image_path_from_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt):
102
+ """
103
+ Main processing pipeline: takes an image, generates code, creates a downloadable
104
+ package, and returns the initial preview and code outputs.
105
+ """
106
  final_image_path = ""
107
  is_temp_file = False
 
108
  if image_path_from_state:
109
  final_image_path = image_path_from_state
 
110
  elif image_np is not None:
111
  is_temp_file = True
112
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
113
+ cv2.imwrite(tmp.name, cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR))
 
114
  final_image_path = tmp.name
 
115
  else:
116
+ return "No image provided.", "Please upload or select an image.", gr.update(visible=False), None
117
 
118
  instructions = {
119
+ "sidebar": sidebar_prompt, "header": header_prompt,
120
+ "navigation": navigation_prompt, "main content": main_content_prompt
 
 
121
  }
122
+ html_content, run_id = generate_html_for_demo(final_image_path, instructions)
123
+
124
+ if not run_id: # Handle potential errors from the generator
125
+ return "Generation failed.", f"Error: {html_content}", gr.update(visible=False), None
126
+
127
+ base_dir = os.path.dirname(os.path.abspath(__file__))
128
+ soup = BeautifulSoup(html_content, 'html.parser')
129
+
130
+ print(f"Processing HTML for run_id: {run_id}")
131
+
132
+ for img in soup.find_all('img'):
133
+ if img.get('src') and not img['src'].startswith(('http', 'data:')):
134
+ original_src = img['src']
135
+ print(f"Processing image: {original_src}")
136
+
137
+ if original_src.startswith('/'):
138
+ img_path = os.path.join(base_dir, original_src.lstrip('/'))
139
+ else:
140
+ possible_paths = [
141
+ os.path.join(base_dir, 'screencoder', 'data', 'output', run_id, original_src),
142
+ os.path.join(base_dir, 'screencoder', 'data', 'output', run_id, 'cropped_images', original_src),
143
+ os.path.join(base_dir, 'screencoder', 'data', 'output', run_id, f'cropped_images_{run_id}', original_src),
144
+ os.path.join(base_dir, 'screencoder', 'data', 'output', run_id, os.path.basename(original_src)),
145
+ os.path.join(base_dir, 'screencoder', 'data', 'output', run_id, original_src.lstrip('./').lstrip('../')),
146
+ ]
147
+
148
+ img_path = None
149
+ for path in possible_paths:
150
+ if os.path.exists(path):
151
+ img_path = path
152
+ print(f"Found image at: {path}")
153
+ break
154
+
155
+ if not img_path:
156
+ print(f"Could not find image: {original_src}")
157
+ # Try to convert to data URL as last resort
158
+ fallback_path = os.path.join(base_dir, 'screencoder', 'data', 'output', run_id, original_src)
159
+ data_url = image_to_data_url(fallback_path)
160
+ if data_url:
161
+ print(f"Converted to data URL: {original_src}")
162
+ img['src'] = data_url
163
+ else:
164
+ img['src'] = original_src
165
+ continue
166
+
167
+ # Check if the image file actually exists
168
+ if img_path and os.path.exists(img_path):
169
+ print(f"Found image: {img_path}")
170
+ # Convert to base64 data URL for better iframe compatibility
171
+ data_url = image_to_data_url(img_path)
172
+ if data_url:
173
+ print(f"Converted to data URL: {original_src}")
174
+ img['src'] = data_url
175
+ else:
176
+ # Fallback to Gradio file path
177
+ img['src'] = f'/file={img_path}'
178
+ else:
179
+ print(f"Image not found: {img_path}")
180
+ data_url = image_to_data_url(img_path)
181
+ if data_url:
182
+ print(f"Converted to data URL: {original_src}")
183
+ img['src'] = data_url
184
+ else:
185
+ img['src'] = original_src
186
+
187
+ html_content = str(soup)
188
 
189
+ output_dir = os.path.join(base_dir, 'screencoder', 'data', 'output', run_id)
190
+ packages_dir = os.path.join(base_dir, 'screencoder', 'data', 'packages')
191
+ os.makedirs(packages_dir, exist_ok=True)
192
+ shutil.make_archive(os.path.join(packages_dir, run_id), 'zip', output_dir)
193
+ package_path = os.path.join(packages_dir, f'{run_id}.zip')
194
+ package_url = f'/file={package_path}'
195
 
196
  if is_temp_file:
197
  os.unlink(final_image_path)
198
+
199
+ initial_preview = render_preview(html_content, 1280, 600, 0.7)
200
 
201
+ return initial_preview, html_content, gr.update(value=package_url, visible=True)
 
202
 
203
+ with gr.Blocks(head=TAILWIND_SCRIPT, theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky")) as demo:
204
  gr.Markdown("# ScreenCoder: Screenshot to Code")
205
+ with gr.Row():
 
206
  with gr.Column(scale=1):
207
  gr.Markdown("### Step 1: Provide an Image")
208
+ active_image = gr.Image(type="numpy", height=400, value=examples_data[0][0])
209
+ upload_button = gr.UploadButton("Click to Upload", file_types=["image"], variant="primary")
 
 
 
 
 
 
210
 
211
  gr.Markdown("### Step 2: Write Prompts (Optional)")
212
+ with gr.Accordion("Component-specific Prompts", open=False):
213
+ sidebar_prompt = gr.Textbox(label="Sidebar", placeholder="Instructions for the sidebar...")
214
+ header_prompt = gr.Textbox(label="Header", placeholder="Instructions for the header...")
215
+ navigation_prompt = gr.Textbox(label="Navigation", placeholder="Instructions for the navigation...")
216
+ main_content_prompt = gr.Textbox(label="Main Content", placeholder="Instructions for the main content...")
217
 
218
+ generate_btn = gr.Button("Generate HTML", variant="primary")
219
 
220
  with gr.Column(scale=2):
221
  with gr.Tabs():
222
  with gr.TabItem("Preview"):
223
+ with gr.Row():
224
+ scale_slider = gr.Slider(0.2, 1.5, value=0.7, step=0.05, label="Zoom")
225
+ width_slider = gr.Slider(400, 1920, value=1280, step=100, label="Canvas Width (px)")
226
+ height_slider = gr.Slider(300, 1080, value=600, step=50, label="Canvas Height (px)")
227
+
228
+ html_preview = gr.HTML(label="Rendered HTML", show_label=False, container=False)
229
+
230
  with gr.TabItem("Code"):
231
+ html_code_output = gr.Code(label="Generated HTML", language="html")
232
+
233
+ download_button = gr.Button("⬇️ Download Package", visible=False, variant="secondary")
234
+
235
+ gr.Examples(
236
+ examples=examples_data,
237
+ fn=lambda *args: args[0],
238
+ inputs=[gr.State(examples_data[0][0])],
239
+ outputs=[active_image],
240
+ cache_examples=False,
241
+ )
242
 
243
+ active_image_path_state = gr.State(examples_data[0][5])
244
+
245
+ def handle_example_click(img_path):
246
+ return img_path, img_path
 
 
 
 
 
 
 
 
 
247
 
248
+ demo.load(
249
+ lambda: (examples_data[0][0], examples_data[0][5]), None, [active_image, active_image_path_state]
 
 
250
  )
251
+
252
+ def handle_upload(uploaded_image_np):
253
+ return uploaded_image_np, None, gr.update(visible=False)
254
+
255
+ upload_button.upload(handle_upload, upload_button, [active_image, active_image_path_state, download_button])
256
 
257
  generate_btn.click(
258
+ process_and_generate,
259
+ [active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
260
+ [html_preview, html_code_output, download_button],
261
  show_progress="full"
262
  )
263
 
264
+ preview_controls = [scale_slider, width_slider, height_slider]
265
+ for control in preview_controls:
266
+ control.change(
267
+ render_preview,
268
+ [html_code_output, width_slider, height_slider, scale_slider],
269
+ html_preview,
270
+ show_progress=True
271
+ )
272
+
273
+ download_button.click(None, download_button, None, js= \
274
+ "(url) => { const link = document.createElement('a'); link.href = url; link.download = ''; document.body.appendChild(link); link.click(); document.body.removeChild(link); }")
275
+
276
+ base_dir = os.path.dirname(os.path.abspath(__file__))
277
+ allowed_paths = [
278
+ os.path.join(base_dir),
279
+ os.path.join(base_dir, 'screencoder', 'data'),
280
+ os.path.join(base_dir, 'screencoder', 'data', 'output'),
281
+ os.path.join(base_dir, 'screencoder', 'data', 'packages')
282
+ ]
283
+
284
+ print(f"Allowed paths for file serving:")
285
+ for path in allowed_paths:
286
+ print(f" - {path}")
287
+
288
  if __name__ == "__main__":
289
+ demo.launch(allowed_paths=allowed_paths)
screencoder/block_parsor.py CHANGED
@@ -16,11 +16,6 @@ PROMPT_MERGE = "Return the bounding boxes of the sidebar, main content, header,
16
  BBOX_TAG_START = "<bbox>"
17
  BBOX_TAG_END = "</bbox>"
18
 
19
- # PROMPT_sidebar = "框出网页中的sidebar的位置,请你只返回对应的bounding box。"
20
- # PROMPT_header = "框出网页中的header的位置,请你只返回对应的bounding box。"
21
- # PROMPT_navigation = "框出网页中的navigation的位置,请你只返回对应的bounding box。"
22
- # PROMPT_main_content = "框出网页中的main content的位置,请你只返回对应的bounding box。"
23
-
24
  def get_args():
25
  parser = argparse.ArgumentParser(description="Parses bounding boxes from an image using a vision model.")
26
  parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
 
16
  BBOX_TAG_START = "<bbox>"
17
  BBOX_TAG_END = "</bbox>"
18
 
 
 
 
 
 
19
  def get_args():
20
  parser = argparse.ArgumentParser(description="Parses bounding boxes from an image using a vision model.")
21
  parser.add_argument('--run_id', type=str, required=True, help='A unique identifier for the processing run.')
screencoder/html_generator.py CHANGED
@@ -18,27 +18,60 @@ def get_args():
18
 
19
  def get_prompt_dict(instructions):
20
  """Dynamically creates the prompt dictionary with the user's instructions."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  return {
22
- "sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["sidebar"]}. The following is the code for filling in:
23
- <div>
24
- your code here
25
- </div>,
26
- only return the code within the <div> and </div> tags""",
27
- "header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["header"]}. The following is the code for filling in:
28
- <div>
29
- your code here
30
- </div>,
31
- only return the code within the <div> and </div> tags""",
32
- "navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. User instruction: {instructions["navigation"]}. The following is the code for filling in:
33
- <div>
34
- your code here
35
- </div>,
36
- only return the code within the <div> and </div> tags""",
37
- "main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["main content"]}. The following is the code for filling in:
38
- <div>
39
- your code here
40
- </div>,
41
- only return the code within the <div> and </div> tags""",
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
 
44
  def generate_code(bbox_tree, img_path, bot, instructions):
@@ -146,6 +179,66 @@ def generate_html(bbox_tree, output_file):
146
 
147
  with open(output_file, 'w') as f:
148
  f.write(bs4.BeautifulSoup(html_content, 'html.parser').prettify())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  def code_substitution(html_file, code_dict):
151
  """Substitutes the generated code into the HTML file."""
@@ -212,7 +305,7 @@ def main():
212
  exit(1)
213
 
214
  bot = Doubao(api_path, model="doubao-1.5-thinking-vision-pro-250428")
215
- code_dict = generate_code(root, img_path, bot, user_instruction)
216
  code_substitution(output_html_path, code_dict)
217
 
218
  print(f"HTML layout with generated content saved to {os.path.basename(output_html_path)}")
 
18
 
19
  def get_prompt_dict(instructions):
20
  """Dynamically creates the prompt dictionary with the user's instructions."""
21
+ # return {
22
+ # "sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["sidebar"]}. The following is the code for filling in:
23
+ # <div>
24
+ # your code here
25
+ # </div>,
26
+ # only return the code within the <div> and </div> tags""",
27
+ # "header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["header"]}. The following is the code for filling in:
28
+ # <div>
29
+ # your code here
30
+ # </div>,
31
+ # only return the code within the <div> and </div> tags""",
32
+ # "navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. User instruction: {instructions["navigation"]}. The following is the code for filling in:
33
+ # <div>
34
+ # your code here
35
+ # </div>,
36
+ # only return the code within the <div> and </div> tags""",
37
+ # "main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. User instruction: {instructions["main content"]}. The following is the code for filling in:
38
+ # <div>
39
+ # your code here
40
+ # </div>,
41
+ # only return the code within the <div> and </div> tags""",
42
+ # }
43
  return {
44
+ "sidebar": f"""这是一个container的截图。这是用户给的额外要求:{instructions["sidebar"]}请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的排版、图标样式、大小、文字信息需要在用户额外条件的基础上与原始截图基本保持一致。请保持图标一致,如果是大块图像,请使用"placeholder.png"替换,注意代码中的大小和位置信息。以下是供填写的代码:
45
+
46
+ <div>
47
+ your code here
48
+ </div>
49
+
50
+ 只需返回<div>和</div>标签内的代码""",
51
+
52
+ "header": f"""这是一个container的截图。这是用户给的额外要求:{instructions["header"]}请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码:
53
+
54
+ <div>
55
+ your code here
56
+ </div>
57
+
58
+ 只需返回<div>和</div>标签内的代码""",
59
+
60
+ "navigation": f"""这是一个container的截图。这是用户给的额外要求:{instructions["navigation"]}请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的在boundary box中的相对位置、文字排版、颜色需要在用户额外条件的基础上与原始截图基本保持一致。图像请你直接使用原始截图中一致的图标。以下是供填写的代码:
61
+
62
+ <div>
63
+ your code here
64
+ </div>
65
+
66
+ 只需返回<div>和</div>标签内的代码""",
67
+
68
+ "main content": f"""这是一个container的截图。这是用户给的额外要求:{instructions["main content"]}请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。截图中显示的图像务必使用"placeholder.png"替换,注意代码中其大小和位置信息,不需要识别其中的文字信息。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码:
69
+
70
+ <div>
71
+ your code here
72
+ </div>
73
+
74
+ 只需返回<div>和</div>标签内的代码"""
75
  }
76
 
77
  def generate_code(bbox_tree, img_path, bot, instructions):
 
179
 
180
  with open(output_file, 'w') as f:
181
  f.write(bs4.BeautifulSoup(html_content, 'html.parser').prettify())
182
+ def generate_code_parallel(bbox_tree, img_path, bot, instructions):
183
+ """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
184
+ code_dict = {}
185
+ t_list = []
186
+ prompt_dict = get_prompt_dict(instructions)
187
+ def _generate_code_with_retry(node, max_retries=3, retry_delay=2):
188
+ """Generate code with retry mechanism for rate limit errors"""
189
+ try:
190
+ # Create a new image instance for each thread
191
+ with Image.open(img_path) as img:
192
+ bbox = node["bbox"]
193
+ cropped_img = img.crop(bbox)
194
+
195
+ # Select prompt based on node type
196
+ if "type" in node:
197
+ if node["type"] in prompt_dict:
198
+ prompt = prompt_dict[node["type"]]
199
+ else:
200
+ print(f"Unknown component type: {node['type']}")
201
+ code_dict[node["id"]] = f"<!-- Unknown component type: {node['type']} -->"
202
+ return
203
+ else:
204
+ print("Node type not found")
205
+ code_dict[node["id"]] = f"<!-- Node type not found -->"
206
+ return
207
+
208
+ for attempt in range(max_retries):
209
+ try:
210
+ code = bot.ask(prompt, encode_image(cropped_img))
211
+ code_dict[node["id"]] = code
212
+ return
213
+ except Exception as e:
214
+ if "rate_limit" in str(e).lower() and attempt < max_retries - 1:
215
+ print(f"Rate limit hit, retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{max_retries})")
216
+ time.sleep(retry_delay)
217
+ retry_delay *= 2 # Exponential backoff
218
+ else:
219
+ print(f"Error generating code for node {node['id']}: {str(e)}")
220
+ code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
221
+ return
222
+ except Exception as e:
223
+ print(f"Error processing image for node {node['id']}: {str(e)}")
224
+ code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
225
+
226
+ def _generate_code(node):
227
+ if not node.get("children"):
228
+ t = Thread(target=_generate_code_with_retry, args=(node,))
229
+ t.start()
230
+ t_list.append(t)
231
+ else:
232
+ for child in node["children"]:
233
+ _generate_code(child)
234
+
235
+ _generate_code(bbox_tree)
236
+
237
+ # Wait for all threads to complete
238
+ for t in t_list:
239
+ t.join()
240
+
241
+ return code_dict
242
 
243
  def code_substitution(html_file, code_dict):
244
  """Substitutes the generated code into the HTML file."""
 
305
  exit(1)
306
 
307
  bot = Doubao(api_path, model="doubao-1.5-thinking-vision-pro-250428")
308
+ code_dict = generate_code_parallel(root, img_path, bot, user_instruction)
309
  code_substitution(output_html_path, code_dict)
310
 
311
  print(f"HTML layout with generated content saved to {os.path.basename(output_html_path)}")
screencoder/image_box_detection.py CHANGED
@@ -15,18 +15,15 @@ async def extract_bboxes_from_html(html_path: Path):
15
 
16
  metrics = await page.evaluate("""
17
  () => {
18
- // 1. Find and store region containers and their bboxes
19
  const region_containers = Array.from(document.querySelectorAll('.box[id]'));
20
  const region_bboxes = region_containers.map(el => {
21
  const rect = el.getBoundingClientRect();
22
  return { id: el.id, x: rect.x, y: rect.y, w: rect.width, h: rect.height };
23
  });
24
 
25
- // 2. Find all potential placeholders on the page
26
  const placeholder_bboxes = [];
27
  let ph_id_counter = 0;
28
- //精准检测
29
- const all_potential_placeholders = document.querySelectorAll('.bg-gray-400');
30
 
31
  for (const el of all_potential_placeholders) {
32
  // Apply the same filters as before
@@ -47,7 +44,6 @@ async def extract_bboxes_from_html(html_path: Path):
47
  }
48
  }
49
 
50
- // Only include placeholders that are inside a detected region
51
  if (containing_region_id) {
52
  placeholder_bboxes.push({
53
  id: 'ph' + ph_id_counter++,
 
15
 
16
  metrics = await page.evaluate("""
17
  () => {
 
18
  const region_containers = Array.from(document.querySelectorAll('.box[id]'));
19
  const region_bboxes = region_containers.map(el => {
20
  const rect = el.getBoundingClientRect();
21
  return { id: el.id, x: rect.x, y: rect.y, w: rect.width, h: rect.height };
22
  });
23
 
 
24
  const placeholder_bboxes = [];
25
  let ph_id_counter = 0;
26
+ const all_potential_placeholders = document.querySelectorAll('img[src="placeholder.png"]');
 
27
 
28
  for (const el of all_potential_placeholders) {
29
  // Apply the same filters as before
 
44
  }
45
  }
46
 
 
47
  if (containing_region_id) {
48
  placeholder_bboxes.push({
49
  id: 'ph' + ph_id_counter++,
screencoder/image_replacer.py CHANGED
@@ -99,7 +99,7 @@ def main():
99
  soup = BeautifulSoup(html_content, 'html.parser')
100
 
101
  # 1. Find all placeholder elements by their class, in document order.
102
- placeholder_elements = soup.find_all(class_="bg-gray-400")
103
 
104
  # 2. Get the placeholder IDs from the mapping file in the correct, sorted order.
105
  def natural_sort_key(s):
@@ -115,32 +115,27 @@ def main():
115
 
116
  # 3. Check for count mismatches
117
  if len(placeholder_elements) != len(ordered_placeholder_ids):
118
- print(f"Warning: Mismatch in counts! Found {len(placeholder_elements)} gray boxes in HTML, but {len(ordered_placeholder_ids)} mappings.")
119
  else:
120
- print(f"Found {len(placeholder_elements)} gray boxes to replace.")
121
 
122
  # 4. Iterate through both lists, create a proper <img> tag, and replace the placeholder.
123
  for i, ph_element in enumerate(placeholder_elements):
124
  if i >= len(ordered_placeholder_ids):
125
- print(f"Warning: More gray boxes in HTML than mappings. Stopping at box {i+1}.")
126
  break
127
 
128
  ph_id = ordered_placeholder_ids[i]
 
129
  relative_img_path = f"{crop_dir.name}/{ph_id}.png"
130
 
131
- # --- Create a new <img> tag and replace the placeholder ---
132
-
133
- # a. Get all classes from the original placeholder to preserve styling.
134
- original_classes = ph_element.get('class', [])
135
- if 'bg-gray-400' in original_classes:
136
- original_classes.remove('bg-gray-400') # Remove the placeholder background
137
-
138
- # b. Create the new <img> tag
139
- img_tag = soup.new_tag("img", src=relative_img_path)
140
- img_tag['class'] = original_classes
141
 
142
- # c. Replace the placeholder with the new image tag.
143
- ph_element.replace_with(img_tag)
 
 
144
 
145
  # Save the modified HTML
146
  final_html_path.write_text(str(soup))
 
99
  soup = BeautifulSoup(html_content, 'html.parser')
100
 
101
  # 1. Find all placeholder elements by their class, in document order.
102
+ placeholder_elements = soup.find_all('img', src="placeholder.png")
103
 
104
  # 2. Get the placeholder IDs from the mapping file in the correct, sorted order.
105
  def natural_sort_key(s):
 
115
 
116
  # 3. Check for count mismatches
117
  if len(placeholder_elements) != len(ordered_placeholder_ids):
118
+ print(f"Warning: Mismatch in counts! Found {len(placeholder_elements)} placeholder images in HTML, but {len(ordered_placeholder_ids)} mappings.")
119
  else:
120
+ print(f"Found {len(placeholder_elements)} placeholder images to replace.")
121
 
122
  # 4. Iterate through both lists, create a proper <img> tag, and replace the placeholder.
123
  for i, ph_element in enumerate(placeholder_elements):
124
  if i >= len(ordered_placeholder_ids):
125
+ print(f"Warning: More placeholder images in HTML than mappings. Stopping at image {i+1}.")
126
  break
127
 
128
  ph_id = ordered_placeholder_ids[i]
129
+ # Fix: Use the correct relative path from HTML file to image directory
130
  relative_img_path = f"{crop_dir.name}/{ph_id}.png"
131
 
132
+ # Debug: Print the path being used
133
+ print(f"Setting image path for {ph_id}: {relative_img_path}")
 
 
 
 
 
 
 
 
134
 
135
+ # --- Update the img tag's src attribute ---
136
+ # Since we're now working with img tags instead of div tags,
137
+ # we just need to update the src attribute
138
+ ph_element['src'] = relative_img_path
139
 
140
  # Save the modified HTML
141
  final_html_path.write_text(str(soup))
screencoder/main.py CHANGED
@@ -99,13 +99,15 @@ def generate_html_for_demo(image_path, instructions):
99
  with open(final_html_path, 'r', encoding='utf-8') as f:
100
  html_content = f.read()
101
  print(f"Successfully generated HTML for run_id: {run_id}")
102
- return html_content
103
  else:
104
- return f"Error: Final HTML file not found for run_id: {run_id}"
 
105
 
106
  except Exception as e:
 
107
  print(f"An error occurred during the workflow for run_id {run_id}: {e}")
108
- return f"An error occurred: {e}"
109
  finally:
110
  # 4. Cleanup: Remove temporary directories
111
  try:
 
99
  with open(final_html_path, 'r', encoding='utf-8') as f:
100
  html_content = f.read()
101
  print(f"Successfully generated HTML for run_id: {run_id}")
102
+ return html_content, run_id
103
  else:
104
+ error_msg = f"Error: Final HTML file not found for run_id: {run_id}"
105
+ return error_msg, run_id
106
 
107
  except Exception as e:
108
+ error_msg = f"An error occurred: {e}"
109
  print(f"An error occurred during the workflow for run_id {run_id}: {e}")
110
+ return error_msg, run_id
111
  finally:
112
  # 4. Cleanup: Remove temporary directories
113
  try:
screencoder/mapping.py CHANGED
@@ -319,8 +319,6 @@ def main():
319
 
320
  # Always generate the debug image if the source exists
321
  generate_debug_overlay(debug_src_path, all_uied_boxes, final_results, uied_shape, debug_overlay_path)
322
- print(f"Debug image written to {debug_overlay_path}")
323
-
324
  print(f"--- Mapping Complete for run_id: {run_id} ---")
325
 
326
  def get_args():
 
319
 
320
  # Always generate the debug image if the source exists
321
  generate_debug_overlay(debug_src_path, all_uied_boxes, final_results, uied_shape, debug_overlay_path)
 
 
322
  print(f"--- Mapping Complete for run_id: {run_id} ---")
323
 
324
  def get_args():