Spaces:

AryanRathod3097
/

CodeNyx

Runtime error

App Files Files Community

AryanRathod3097 commited on Jul 15

Commit

d266dc0

verified ·

1 Parent(s): 14f8553

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -84

app.py CHANGED Viewed

@@ -1,95 +1,96 @@
 """
-Tiny-CodeNyx – 160 MB distilled general-knowledge code model
-Fine-tuned on 5k Q&A snippets in < 2 min
 """
-import os, json, torch, gradio as gr
-from datasets import load_dataset
-from transformers import (AutoTokenizer, AutoModelForCausalLM,
-                          Trainer, TrainingArguments, DataCollatorForLanguageModeling)
-from peft import LoraConfig, get_peft_model
-MODEL_ID = "distilgpt2"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-tokenizer.pad_token = tokenizer.eos_token
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
-# ---------- 1. 5k-shot general-knowledge dataset ----------
-def build_mini_dataset():
-    """Return a tiny JSON that mixes code & general facts."""
-    data = [
-        {"text": "Q: Write a FastAPI route that returns current UTC time.\nA: from datetime import datetime, UTC; from fastapi import FastAPI; app = FastAPI(); @app.get('/time'); def get_time(): return {'utc': datetime.now(UTC).isoformat()}"},
-        {"text": "Q: Capital of France?\nA: Paris"},
-        {"text": "Q: Print Fibonacci sequence in Python.\nA: a,b=0,1;[print(a)or(a:=b,b:=a+b)for _ in range(10)]"},
-        {"text": "Q: What is 2+2?\nA: 4"},
-        {"text": "Q: Explain list comprehension.\nA: [expr for item in iterable if condition]"},
-        {"text": "Q: Who wrote Romeo and Juliet?\nA: William Shakespeare"},
-        {"text": "Q: How to reverse a string in Python?\nA: s[::-1]"},
-        {"text": "Q: Largest planet?\nA: Jupiter"},
-        {"text": "Q: SQL to create users table.\nA: CREATE TABLE users(id INT PRIMARY KEY, name VARCHAR(100));"},
-        {"text": "Q: Speed of light in vacuum?\nA: 299 792 458 m/s"},
-    ]
-    # replicate to 5 000 lines
-    data = data * 500
-    with open("mini.json", "w") as f:
-        for d in data:
-            f.write(json.dumps(d) + "\n")
-    return load_dataset("json", data_files="mini.json")["train"]
-dataset = build_mini_dataset()
-# ---------- 2. Tokenize ----------
-def tokenize(examples):
-    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)
-dataset = dataset.map(tokenize, batched=True)
-data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)
-# ---------- 3. LoRA fine-tune ----------
-lora_config = LoraConfig(
-    r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["c_attn"]
 )
-model = get_peft_model(model, lora_config)
-training_args = TrainingArguments(
-    output_dir="./tiny-codenyx",
-    per_device_train_batch_size=4,
-    num_train_epochs=1,
-    logging_steps=50,
-    fp16=True,
-    save_steps=500,
-    save_total_limit=1,
-    report_to=None,
 )
-trainer = Trainer(
-    model=model,
-    args=training_args,
-    train_dataset=dataset,
-    data_collator=data_collator,
 )
-trainer.train()
-trainer.save_model("./tiny-codenyx")
-# ---------- 4. Gradio chat ----------
-model.eval()
-def chat_fn(message, history):
-    prompt = "\n".join([f"Q: {h[0]}\nA: {h[1]}" for h in history])
-    prompt += f"\nQ: {message}\nA:"
-    inputs = tokenizer.encode(prompt, return_tensors="pt")
-    with torch.no_grad():
-        outputs = model.generate(
-            inputs,
-            max_new_tokens=128,
-            temperature=0.7,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id,
         )
-    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    answer = answer.split("A:")[-1].strip()
-    return answer
-gr.ChatInterface(
-    fn=chat_fn,
-    title="Tiny-CodeNyx – 160 MB General-Knowledge Bot",
-    description="Ask anything code or general knowledge; model trained on 5k Q&A.",
-    theme="soft"
-).queue().launch(server_name="0.0.0.0", server_port=7860, share=True)

 """
+RealCanvas-MJ4K
+A 16-GB-friendly Gradio Space that
+1. streams the prompt dataset MohamedRashad/midjourney-detailed-prompts
+2. generates realistic images using SDXL-Lightning
+3. optionally displays random images from opendiffusionai/cc12m-4mp-realistic
 """
+import gradio as gr
+import torch, os, random, json, requests
+from io import BytesIO
+from PIL import Image
+from datasets import load_dataset
+from huggingface_hub import hf_hub_download
+from diffusers import StableDiffusionXLPipeline, DPMSolverMultistepScheduler
+# -------------------------------------------------
+# 1. Load the prompt dataset (lazy streaming)
+# -------------------------------------------------
+print("🔍 Streaming prompt dataset …")
+ds_prompts = load_dataset(
+    "MohamedRashad/midjourney-detailed-prompts",
+    split="train",
+    streaming=True
 )
+prompt_pool = list(ds_prompts.shuffle(seed=42).take(500_000))  # ≈ 5 MB RAM
+# -------------------------------------------------
+# 2. Load SDXL-Lightning (fp16, 4-step, 4 GB VRAM)
+# -------------------------------------------------
+MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
+print("🤖 Loading SDXL-Lightning …")
+pipe = StableDiffusionXLPipeline.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.float16,
+    variant="fp16",
+    use_safetensors=True
 )
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+# lightning LoRA
+lora_path = hf_hub_download(
+    repo_id="ByteDance/SDXL-Lightning",
+    filename="sdxl_lightning_4step_lora.safetensors"
 )
+pipe.load_lora_weights(lora_path)
+pipe = pipe.to("cuda") if torch.cuda.is_available() else pipe.to("cpu")
+pipe.enable_attention_slicing()
+# -------------------------------------------------
+# 3. Random CC12M-4MP image helper (optional demo)
+# -------------------------------------------------
+print("📸 Streaming CC12M-4MP-realistic …")
+ds_images = load_dataset(
+    "opendiffusionai/cc12m-4mp-realistic",
+    split="train",
+    streaming=True
+)
+img_pool = list(ds_images.shuffle(seed=42).take(1_000))  # ≈ 10 MB RAM
+def random_cc12m_image():
+    sample = random.choice(img_pool)
+    return sample["image"].resize((512, 512))
+# -------------------------------------------------
+# 4. Gradio UI
+# -------------------------------------------------
+def generate(prompt: str, steps: int = 4, guidance: float = 0.0):
+    if not prompt.strip():
+        prompt = random.choice(prompt_pool)["prompt"]
+    image = pipe(
+        prompt,
+        num_inference_steps=steps,
+        guidance_scale=guidance
+    ).images[0]
+    return image.resize((768, 768))
+with gr.Blocks(title="RealCanvas-MJ4K") as demo:
+    gr.Markdown("# 🎨 RealCanvas-MJ4K  |  Midjourney-level realism under 16 GB")
+    with gr.Row():
+        prompt_in = gr.Textbox(
+            label="Prompt (leave empty for random Midjourney-style prompt)",
+            lines=2
         )
+    with gr.Row():
+        steps = gr.Slider(1, 8, value=4, step=1, label="Inference steps (SDXL-Lightning)")
+        guidance = gr.Slider(0.0, 2.0, value=0.0, step=0.1, label="Guidance scale")
+    btn = gr.Button("Generate", variant="primary")
+    gallery = gr.Image(type="pil", label="Generated image")
+    with gr.Accordion("📸 Random CC12M-4MP sample", open=False):
+        cc_btn = gr.Button("Show random CC12M-4MP image")
+        cc_out = gr.Image(type="pil", label="Real photo from dataset")
+    btn.click(generate, [prompt_in, steps, guidance], gallery)
+    cc_btn.click(random_cc12m_image, outputs=cc_out)
+demo.queue(max_size=8).launch()