akhaliq HF Staff commited on
Commit
38d659c
·
1 Parent(s): 328b6bb

add glm 4.5

Browse files
Files changed (1) hide show
  1. app.py +127 -35
app.py CHANGED
@@ -48,7 +48,16 @@ DIVIDER = "======="
48
  REPLACE_END = ">>>>>>> REPLACE"
49
 
50
  # Configuration
51
- HTML_SYSTEM_PROMPT = """ONLY USE HTML, CSS AND JAVASCRIPT. If you want to use ICON make sure to import the library first. Try to create the best UI possible by using only HTML, CSS and JAVASCRIPT. MAKE IT RESPONSIVE USING MODERN CSS. Use as much as you can modern CSS for the styling, if you can't do something with modern CSS, then use custom CSS. Also, try to elaborate as much as you can, to create something unique. ALWAYS GIVE THE RESPONSE INTO A SINGLE HTML FILE
 
 
 
 
 
 
 
 
 
52
 
53
  For website redesign tasks:
54
  - Use the provided original HTML code as the starting point for redesign
@@ -67,6 +76,19 @@ Always respond with code that can be executed or rendered directly.
67
 
68
  Always output only the HTML code inside a ```html ... ``` code block, and do not include any explanations or extra text. Do NOT add the language name at the top of the code output."""
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  TRANSFORMERS_JS_SYSTEM_PROMPT = """You are an expert web developer creating a transformers.js application. You will generate THREE separate files: index.html, index.js, and style.css.
71
 
72
  IMPORTANT: You MUST output ALL THREE files in the following format:
@@ -207,9 +229,16 @@ Always output only the three code blocks as shown above, and do not include any
207
  GENERIC_SYSTEM_PROMPT = """You are an expert {language} developer. Write clean, idiomatic, and runnable {language} code for the user's request. If possible, include comments and best practices. Output ONLY the code inside a ``` code block, and do not include any explanations or extra text. If the user provides a file or other context, use it as a reference. If the code is for a script or app, make it as self-contained as possible. Do NOT add the language name at the top of the code output."""
208
 
209
  # System prompt with search capability
210
- HTML_SYSTEM_PROMPT_WITH_SEARCH = """ONLY USE HTML, CSS AND JAVASCRIPT. If you want to use ICON make sure to import the library first. Try to create the best UI possible by using only HTML, CSS and JAVASCRIPT. MAKE IT RESPONSIVE USING MODERN CSS. Use as much as you can modern CSS for the styling, if you can't do something with modern CSS, then use custom CSS. Also, try to elaborate as much as you can, to create something unique. ALWAYS GIVE THE RESPONSE INTO A SINGLE HTML FILE
211
 
212
- You have access to real-time web search. When needed, use web search to find the latest information, best practices, or specific technologies.
 
 
 
 
 
 
 
213
 
214
  For website redesign tasks:
215
  - Use the provided original HTML code as the starting point for redesign
@@ -421,6 +450,11 @@ AVAILABLE_MODELS = [
421
  "id": "zai-org/GLM-4.5",
422
  "description": "GLM-4.5 model with thinking capabilities for advanced code generation"
423
  },
 
 
 
 
 
424
  {
425
  "name": "GLM-4.1V-9B-Thinking",
426
  "id": "THUDM/GLM-4.1V-9B-Thinking",
@@ -1006,7 +1040,8 @@ def update_image_input_visibility(model):
1006
  """Update image input visibility based on selected model"""
1007
  is_ernie_vl = model.get("id") == "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
1008
  is_glm_vl = model.get("id") == "THUDM/GLM-4.1V-9B-Thinking"
1009
- return gr.update(visible=is_ernie_vl or is_glm_vl)
 
1010
 
1011
  def process_image_for_model(image):
1012
  """Convert image to base64 for model input"""
@@ -1680,37 +1715,9 @@ Please use the search results above to help create the requested application wit
1680
  return enhanced_query
1681
 
1682
  def send_to_sandbox(code):
1683
- # Add a wrapper to inject necessary permissions and ensure full HTML
1684
- wrapped_code = f"""
1685
- <!DOCTYPE html>
1686
- <html>
1687
- <head>
1688
- <meta charset=\"UTF-8\">
1689
- <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">
1690
- <script>
1691
- // Safe localStorage polyfill
1692
- const safeStorage = {{
1693
- _data: {{}},
1694
- getItem: function(key) {{ return this._data[key] || null; }},
1695
- setItem: function(key, value) {{ this._data[key] = value; }},
1696
- removeItem: function(key) {{ delete this._data[key]; }},
1697
- clear: function() {{ this._data = {{}}; }}
1698
- }};
1699
- Object.defineProperty(window, 'localStorage', {{
1700
- value: safeStorage,
1701
- writable: false
1702
- }});
1703
- window.onerror = function(message, source, lineno, colno, error) {{
1704
- console.error('Error:', message);
1705
- }};
1706
- </script>
1707
- </head>
1708
- <body>
1709
- {code}
1710
- </body>
1711
- </html>
1712
- """
1713
- encoded_html = base64.b64encode(wrapped_code.encode('utf-8')).decode('utf-8')
1714
  data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
1715
  iframe = f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture"></iframe>'
1716
  return iframe
@@ -2540,6 +2547,91 @@ This will help me create a better design for you."""
2540
  }
2541
  return
2542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2543
  # Use dynamic client based on selected model (for non-GLM-4.5 models)
2544
  client = get_inference_client(_current_model["id"], provider)
2545
 
 
48
  REPLACE_END = ">>>>>>> REPLACE"
49
 
50
  # Configuration
51
+ HTML_SYSTEM_PROMPT = """You are an expert front-end developer.
52
+
53
+ Output a COMPLETE, STANDALONE HTML document that renders directly in a browser. Requirements:
54
+ - Include <!DOCTYPE html>, <html>, <head>, and <body> with proper nesting
55
+ - Include all required <link> and <script> tags for any libraries you use
56
+ - Do NOT escape characters (no \\n, \\t, or escaped quotes). Output raw HTML/JS/CSS.
57
+ - If you use React or Tailwind, include correct CDN tags
58
+ - Keep everything in ONE file; inline CSS/JS as needed
59
+
60
+ For website redesign tasks:
61
 
62
  For website redesign tasks:
63
  - Use the provided original HTML code as the starting point for redesign
 
76
 
77
  Always output only the HTML code inside a ```html ... ``` code block, and do not include any explanations or extra text. Do NOT add the language name at the top of the code output."""
78
 
79
+ # Stricter prompt for GLM-4.5V to ensure a complete, runnable HTML document with no escaped characters
80
+ GLM45V_HTML_SYSTEM_PROMPT = """You are an expert front-end developer.
81
+
82
+ Output a COMPLETE, STANDALONE HTML document that renders directly in a browser. Requirements:
83
+ - Include <!DOCTYPE html>, <html>, <head>, and <body> with proper nesting
84
+ - Include all required <link> and <script> tags for any libraries you use
85
+ - Do NOT escape characters (no \\n, \\t, or escaped quotes). Output raw HTML/JS/CSS.
86
+ - If you use React or Tailwind, include correct CDN tags
87
+ - Keep everything in ONE file; inline CSS/JS as needed
88
+
89
+ Return ONLY the code inside a single ```html ... ``` code block. No additional text before or after.
90
+ """
91
+
92
  TRANSFORMERS_JS_SYSTEM_PROMPT = """You are an expert web developer creating a transformers.js application. You will generate THREE separate files: index.html, index.js, and style.css.
93
 
94
  IMPORTANT: You MUST output ALL THREE files in the following format:
 
229
  GENERIC_SYSTEM_PROMPT = """You are an expert {language} developer. Write clean, idiomatic, and runnable {language} code for the user's request. If possible, include comments and best practices. Output ONLY the code inside a ``` code block, and do not include any explanations or extra text. If the user provides a file or other context, use it as a reference. If the code is for a script or app, make it as self-contained as possible. Do NOT add the language name at the top of the code output."""
230
 
231
  # System prompt with search capability
232
+ HTML_SYSTEM_PROMPT_WITH_SEARCH = """You are an expert front-end developer. You have access to real-time web search.
233
 
234
+ Output a COMPLETE, STANDALONE HTML document that renders directly in a browser. Requirements:
235
+ - Include <!DOCTYPE html>, <html>, <head>, and <body> with proper nesting
236
+ - Include all required <link> and <script> tags for any libraries you use
237
+ - Do NOT escape characters (no \\n, \\t, or escaped quotes). Output raw HTML/JS/CSS.
238
+ - If you use React or Tailwind, include correct CDN tags
239
+ - Keep everything in ONE file; inline CSS/JS as needed
240
+
241
+ Use web search when needed to find the latest best practices or correct CDN links.
242
 
243
  For website redesign tasks:
244
  - Use the provided original HTML code as the starting point for redesign
 
450
  "id": "zai-org/GLM-4.5",
451
  "description": "GLM-4.5 model with thinking capabilities for advanced code generation"
452
  },
453
+ {
454
+ "name": "GLM-4.5V",
455
+ "id": "zai-org/GLM-4.5V",
456
+ "description": "GLM-4.5V multimodal model with image understanding for code generation"
457
+ },
458
  {
459
  "name": "GLM-4.1V-9B-Thinking",
460
  "id": "THUDM/GLM-4.1V-9B-Thinking",
 
1040
  """Update image input visibility based on selected model"""
1041
  is_ernie_vl = model.get("id") == "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT"
1042
  is_glm_vl = model.get("id") == "THUDM/GLM-4.1V-9B-Thinking"
1043
+ is_glm_45v = model.get("id") == "zai-org/GLM-4.5V"
1044
+ return gr.update(visible=is_ernie_vl or is_glm_vl or is_glm_45v)
1045
 
1046
  def process_image_for_model(image):
1047
  """Convert image to base64 for model input"""
 
1715
  return enhanced_query
1716
 
1717
  def send_to_sandbox(code):
1718
+ """Render HTML in a sandboxed iframe. Assumes full HTML is provided by prompts."""
1719
+ html_doc = (code or "").strip()
1720
+ encoded_html = base64.b64encode(html_doc.encode('utf-8')).decode('utf-8')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1721
  data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
1722
  iframe = f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture"></iframe>'
1723
  return iframe
 
2547
  }
2548
  return
2549
 
2550
+ # Handle GLM-4.5V (multimodal vision)
2551
+ if _current_model["id"] == "zai-org/GLM-4.5V":
2552
+ # Build structured messages with a strong system prompt to enforce full HTML output
2553
+ structured = [
2554
+ {"role": "system", "content": GLM45V_HTML_SYSTEM_PROMPT}
2555
+ ]
2556
+ if image is not None:
2557
+ user_msg = {
2558
+ "role": "user",
2559
+ "content": [
2560
+ {"type": "text", "text": enhanced_query},
2561
+ ],
2562
+ }
2563
+ try:
2564
+ import io, base64
2565
+ from PIL import Image
2566
+ import numpy as np
2567
+ if isinstance(image, np.ndarray):
2568
+ image = Image.fromarray(image)
2569
+ buf = io.BytesIO()
2570
+ image.save(buf, format="PNG")
2571
+ b64 = base64.b64encode(buf.getvalue()).decode()
2572
+ user_msg["content"].append({
2573
+ "type": "image_url",
2574
+ "image_url": {"url": f"data:image/png;base64,{b64}"}
2575
+ })
2576
+ structured.append(user_msg)
2577
+ except Exception:
2578
+ structured.append({"role": "user", "content": enhanced_query})
2579
+ else:
2580
+ structured.append({"role": "user", "content": enhanced_query})
2581
+
2582
+ try:
2583
+ client = InferenceClient(
2584
+ provider="auto",
2585
+ api_key=os.environ["HF_TOKEN"],
2586
+ bill_to="huggingface",
2587
+ )
2588
+ stream = client.chat.completions.create(
2589
+ model="zai-org/GLM-4.5V",
2590
+ messages=structured,
2591
+ stream=True,
2592
+ )
2593
+ content = ""
2594
+ for chunk in stream:
2595
+ if getattr(chunk, "choices", None) and chunk.choices and getattr(chunk.choices[0], "delta", None) and getattr(chunk.choices[0].delta, "content", None):
2596
+ content += chunk.choices[0].delta.content
2597
+ clean_code = remove_code_block(content)
2598
+ # Ensure escaped newlines/tabs from model are rendered correctly
2599
+ if "\\n" in clean_code:
2600
+ clean_code = clean_code.replace("\\n", "\n")
2601
+ if "\\t" in clean_code:
2602
+ clean_code = clean_code.replace("\\t", "\t")
2603
+ preview_val = None
2604
+ if language == "html":
2605
+ preview_val = send_to_sandbox(clean_code)
2606
+ elif language == "python" and is_streamlit_code(clean_code):
2607
+ preview_val = send_streamlit_to_stlite(clean_code)
2608
+ yield {
2609
+ code_output: gr.update(value=clean_code, language=get_gradio_language(language)),
2610
+ history_output: history_to_chatbot_messages(_history),
2611
+ sandbox: preview_val or "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML or Streamlit-in-Python.</div>",
2612
+ }
2613
+ except Exception as e:
2614
+ content = f"Error with GLM-4.5V: {str(e)}\n\nPlease make sure HF_TOKEN environment variable is set."
2615
+
2616
+ clean_code = remove_code_block(content)
2617
+ if "\\n" in clean_code:
2618
+ clean_code = clean_code.replace("\\n", "\n")
2619
+ if "\\t" in clean_code:
2620
+ clean_code = clean_code.replace("\\t", "\t")
2621
+ _history.append([query, clean_code])
2622
+ preview_val = None
2623
+ if language == "html":
2624
+ preview_val = send_to_sandbox(clean_code)
2625
+ elif language == "python" and is_streamlit_code(clean_code):
2626
+ preview_val = send_streamlit_to_stlite(clean_code)
2627
+ yield {
2628
+ code_output: clean_code,
2629
+ history: _history,
2630
+ sandbox: preview_val or "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML or Streamlit-in-Python.</div>",
2631
+ history_output: history_to_chatbot_messages(_history),
2632
+ }
2633
+ return
2634
+
2635
  # Use dynamic client based on selected model (for non-GLM-4.5 models)
2636
  client = get_inference_client(_current_model["id"], provider)
2637