Spaces:
Running
on
Zero
Running
on
Zero
fancyfeast
commited on
Commit
·
ccb684e
1
Parent(s):
301ae18
Tweak UI
Browse files- JoyCaptionLogo1.svg +10 -0
- app.py +23 -7
JoyCaptionLogo1.svg
ADDED
|
|
app.py
CHANGED
|
@@ -13,12 +13,14 @@ TITLE = """<style>
|
|
| 13 |
gap:16px; margin:4px 0 12px;}
|
| 14 |
.joy-header h1{margin:0; font-size:1.9rem; line-height:1.2;}
|
| 15 |
.joy-header p {margin:2px 0 0; font-size:0.9rem; color:#666;}
|
|
|
|
| 16 |
</style>
|
| 17 |
|
| 18 |
<div class="joy-header">
|
|
|
|
| 19 |
<div>
|
| 20 |
<h1>JoyCaption <span style="font-weight:400">Beta One</span></h1>
|
| 21 |
-
<p>Image-captioning model | build
|
| 22 |
</div>
|
| 23 |
</div>
|
| 24 |
<hr>"""
|
|
@@ -46,12 +48,12 @@ DESCRIPTION = """
|
|
| 46 |
<tr><td><strong>Straightforward</strong></td>
|
| 47 |
<td>Objective, no fluff, and more succinct than Descriptive.</td></tr>
|
| 48 |
<tr><td><strong>Stable Diffusion Prompt</strong></td>
|
| 49 |
-
<td>Reverse-engineers a prompt that could have produced the image in a SD/T2I model.<br><em>⚠︎ Experimental – can glitch ≈ 3
|
| 50 |
<tr><td><strong>MidJourney</strong></td>
|
| 51 |
-
<td>Same idea as above but tuned to MidJourney’s prompt style.<br><em>⚠︎ Experimental – can glitch ≈ 3
|
| 52 |
<tr><td><strong>Danbooru tag list</strong></td>
|
| 53 |
<td>Comma-separated tags strictly following Danbooru conventions
|
| 54 |
-
(artist:, copyright:, etc.). Lower-case underscores only.<br><em>⚠︎ Experimental – can glitch ≈ 3
|
| 55 |
<tr><td><strong>e621 tag list</strong></td>
|
| 56 |
<td>Alphabetical, namespaced tags in e621 style – includes species/meta
|
| 57 |
tags when relevant.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
|
|
@@ -59,7 +61,7 @@ DESCRIPTION = """
|
|
| 59 |
<td>Rule34 style alphabetical tag dump; artist/copyright/character
|
| 60 |
prefixes first.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
|
| 61 |
<tr><td><strong>Booru-like tag list</strong></td>
|
| 62 |
-
<td>Looser tag list when you want labels but not a specific Booru format.<br><em>⚠︎ Experimental – can glitch ≈ 3
|
| 63 |
<tr><td><strong>Art Critic</strong></td>
|
| 64 |
<td>Paragraph of art-historical commentary: composition, symbolism, style,
|
| 65 |
lighting, movement, etc.</td></tr>
|
|
@@ -163,6 +165,8 @@ CAPTION_TYPE_MAP = {
|
|
| 163 |
"Write a {length} caption for this image as if it were being used for a social media post.",
|
| 164 |
],
|
| 165 |
}
|
|
|
|
|
|
|
| 166 |
|
| 167 |
|
| 168 |
|
|
@@ -194,6 +198,11 @@ def build_prompt(caption_type: str, caption_length: str | int, extra_options: li
|
|
| 194 |
)
|
| 195 |
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
@spaces.GPU()
|
| 198 |
@torch.no_grad()
|
| 199 |
def chat_joycaption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
|
|
@@ -271,7 +280,7 @@ with gr.Blocks() as demo:
|
|
| 271 |
with gr.Accordion("Extra Options", open=False):
|
| 272 |
extra_options = gr.CheckboxGroup(
|
| 273 |
choices=[
|
| 274 |
-
|
| 275 |
"Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
|
| 276 |
"Include information about lighting.",
|
| 277 |
"Include information about camera angle.",
|
|
@@ -302,7 +311,7 @@ with gr.Blocks() as demo:
|
|
| 302 |
label="Select one or more",
|
| 303 |
)
|
| 304 |
|
| 305 |
-
name_input = gr.Textbox(label="Person / Character Name")
|
| 306 |
|
| 307 |
with gr.Accordion("Generation settings", open=False):
|
| 308 |
temperature_slider = gr.Slider(
|
|
@@ -325,6 +334,13 @@ with gr.Blocks() as demo:
|
|
| 325 |
with gr.Column():
|
| 326 |
prompt_box = gr.Textbox(lines=4, label="Prompt", interactive=True)
|
| 327 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
# Auto-update prompt box whenever any of the inputs change
|
| 329 |
for ctrl in (caption_type, caption_length, extra_options, name_input):
|
| 330 |
ctrl.change(
|
|
|
|
| 13 |
gap:16px; margin:4px 0 12px;}
|
| 14 |
.joy-header h1{margin:0; font-size:1.9rem; line-height:1.2;}
|
| 15 |
.joy-header p {margin:2px 0 0; font-size:0.9rem; color:#666;}
|
| 16 |
+
.joy-header img{height:56px;}
|
| 17 |
</style>
|
| 18 |
|
| 19 |
<div class="joy-header">
|
| 20 |
+
<img src="logo.svg" alt="JoyCaption logo">
|
| 21 |
<div>
|
| 22 |
<h1>JoyCaption <span style="font-weight:400">Beta One</span></h1>
|
| 23 |
+
<p>Image-captioning model | build mb3500zp</p>
|
| 24 |
</div>
|
| 25 |
</div>
|
| 26 |
<hr>"""
|
|
|
|
| 48 |
<tr><td><strong>Straightforward</strong></td>
|
| 49 |
<td>Objective, no fluff, and more succinct than Descriptive.</td></tr>
|
| 50 |
<tr><td><strong>Stable Diffusion Prompt</strong></td>
|
| 51 |
+
<td>Reverse-engineers a prompt that could have produced the image in a SD/T2I model.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time.</em></td></tr>
|
| 52 |
<tr><td><strong>MidJourney</strong></td>
|
| 53 |
+
<td>Same idea as above but tuned to MidJourney’s prompt style.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time.</em></td></tr>
|
| 54 |
<tr><td><strong>Danbooru tag list</strong></td>
|
| 55 |
<td>Comma-separated tags strictly following Danbooru conventions
|
| 56 |
+
(artist:, copyright:, etc.). Lower-case underscores only.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time..</em></td></tr>
|
| 57 |
<tr><td><strong>e621 tag list</strong></td>
|
| 58 |
<td>Alphabetical, namespaced tags in e621 style – includes species/meta
|
| 59 |
tags when relevant.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
|
|
|
|
| 61 |
<td>Rule34 style alphabetical tag dump; artist/copyright/character
|
| 62 |
prefixes first.<br><em>⚠︎ Experimental – can glitch ≈ 3 %.</em></td></tr>
|
| 63 |
<tr><td><strong>Booru-like tag list</strong></td>
|
| 64 |
+
<td>Looser tag list when you want labels but not a specific Booru format.<br><em>⚠︎ Experimental – can glitch ≈ 3% of the time..</em></td></tr>
|
| 65 |
<tr><td><strong>Art Critic</strong></td>
|
| 66 |
<td>Paragraph of art-historical commentary: composition, symbolism, style,
|
| 67 |
lighting, movement, etc.</td></tr>
|
|
|
|
| 165 |
"Write a {length} caption for this image as if it were being used for a social media post.",
|
| 166 |
],
|
| 167 |
}
|
| 168 |
+
NAME_OPTION = "If there is a person/character in the image you must refer to them as {name}."
|
| 169 |
+
|
| 170 |
|
| 171 |
|
| 172 |
|
|
|
|
| 198 |
)
|
| 199 |
|
| 200 |
|
| 201 |
+
def toggle_name_box(selected_options: list[str]):
|
| 202 |
+
"""Show the name textbox only when the specific option is selected."""
|
| 203 |
+
return gr.update(visible=NAME_OPTION in selected_options)
|
| 204 |
+
|
| 205 |
+
|
| 206 |
@spaces.GPU()
|
| 207 |
@torch.no_grad()
|
| 208 |
def chat_joycaption(input_image: Image.Image, prompt: str, temperature: float, top_p: float, max_new_tokens: int, log_prompt: bool) -> Generator[str, None, None]:
|
|
|
|
| 280 |
with gr.Accordion("Extra Options", open=False):
|
| 281 |
extra_options = gr.CheckboxGroup(
|
| 282 |
choices=[
|
| 283 |
+
NAME_OPTION,
|
| 284 |
"Do NOT include information about people/characters that cannot be changed (like ethnicity, gender, etc), but do still include changeable attributes (like hair style).",
|
| 285 |
"Include information about lighting.",
|
| 286 |
"Include information about camera angle.",
|
|
|
|
| 311 |
label="Select one or more",
|
| 312 |
)
|
| 313 |
|
| 314 |
+
name_input = gr.Textbox(label="Person / Character Name", visible=False)
|
| 315 |
|
| 316 |
with gr.Accordion("Generation settings", open=False):
|
| 317 |
temperature_slider = gr.Slider(
|
|
|
|
| 334 |
with gr.Column():
|
| 335 |
prompt_box = gr.Textbox(lines=4, label="Prompt", interactive=True)
|
| 336 |
|
| 337 |
+
# Show the name input box only when the specific option is selected
|
| 338 |
+
extra_options.change(
|
| 339 |
+
toggle_name_box,
|
| 340 |
+
inputs=extra_options,
|
| 341 |
+
outputs=name_input,
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
# Auto-update prompt box whenever any of the inputs change
|
| 345 |
for ctrl in (caption_type, caption_length, extra_options, name_input):
|
| 346 |
ctrl.change(
|