Spaces:
Running
on
Zero
Running
on
Zero
Updating decription
Browse files
app.py
CHANGED
|
@@ -31,7 +31,7 @@ class Config:
|
|
| 31 |
|
| 32 |
# Bytelatent Specific
|
| 33 |
BLT_WEIGHTS_DIR: str = "hf-weights"
|
| 34 |
-
BLT_MAX_BYTES_FOR_DEMO: float = math.inf
|
| 35 |
|
| 36 |
# Gradio
|
| 37 |
DEFAULT_PROMPT: str = "Daenerys Targaryen is in Game of Thrones, a fantasy epic by George R.R. Martin."
|
|
@@ -39,7 +39,7 @@ class Config:
|
|
| 39 |
GRADIO_TITLE: str = "BLT's Entropy-based Patcher vs. Tokenizer Visualisation"
|
| 40 |
GRADIO_DESC: str = (
|
| 41 |
"Enter text to visualize its segmentation according to different methods:\n"
|
| 42 |
-
f"1. **Byte Latent Transformer (BLT):** Entropy-based patching plot and patched text (
|
| 43 |
f"2. **Tiktoken (GPT-4):** Text segmented by `{TIKTOKEN_ENCODING_NAME}` tokens.\n"
|
| 44 |
f"3. **Llama 3:** Text segmented by the `{LLAMA3_MODEL_NAME}` tokenizer."
|
| 45 |
)
|
|
@@ -418,7 +418,7 @@ with gr.Blocks(theme=Config.GRADIO_THEME) as iface:
|
|
| 418 |
placeholder="Enter text here...",
|
| 419 |
# Max length is for UI input; Bytelatent truncation happens in backend
|
| 420 |
lines=5,
|
| 421 |
-
info=f"Note: Bytelatent processing is limited to ~{Config.BLT_MAX_BYTES_FOR_DEMO} bytes for this demo."
|
| 422 |
)
|
| 423 |
submit_button = gr.Button("Generate Visualizations", variant="primary")
|
| 424 |
status_output = gr.Textbox(label="Processing Status", interactive=False, lines=10) # More space for detailed status
|
|
|
|
| 31 |
|
| 32 |
# Bytelatent Specific
|
| 33 |
BLT_WEIGHTS_DIR: str = "hf-weights"
|
| 34 |
+
BLT_MAX_BYTES_FOR_DEMO: float = math.inf if torch.cuda.is_available() else 512.0
|
| 35 |
|
| 36 |
# Gradio
|
| 37 |
DEFAULT_PROMPT: str = "Daenerys Targaryen is in Game of Thrones, a fantasy epic by George R.R. Martin."
|
|
|
|
| 39 |
GRADIO_TITLE: str = "BLT's Entropy-based Patcher vs. Tokenizer Visualisation"
|
| 40 |
GRADIO_DESC: str = (
|
| 41 |
"Enter text to visualize its segmentation according to different methods:\n"
|
| 42 |
+
f"1. **Byte Latent Transformer (BLT):** Entropy-based patching plot and patched text (using `blt_main_entropy_100m_512w`).\n"
|
| 43 |
f"2. **Tiktoken (GPT-4):** Text segmented by `{TIKTOKEN_ENCODING_NAME}` tokens.\n"
|
| 44 |
f"3. **Llama 3:** Text segmented by the `{LLAMA3_MODEL_NAME}` tokenizer."
|
| 45 |
)
|
|
|
|
| 418 |
placeholder="Enter text here...",
|
| 419 |
# Max length is for UI input; Bytelatent truncation happens in backend
|
| 420 |
lines=5,
|
| 421 |
+
info="" if torch.cuda.is_available() else f"Note: Bytelatent processing is limited to ~{Config.BLT_MAX_BYTES_FOR_DEMO} bytes for this demo."
|
| 422 |
)
|
| 423 |
submit_button = gr.Button("Generate Visualizations", variant="primary")
|
| 424 |
status_output = gr.Textbox(label="Processing Status", interactive=False, lines=10) # More space for detailed status
|