store-user-feedback

Sleeping

App Files Files Community

WillHeld commited on May 19

Commit

5cd2aa6

verified ·

1 Parent(s): f5ef4f1

Update app.py

Browse files

Files changed (1) hide show

app.py +198 -191

app.py CHANGED Viewed

@@ -1,218 +1,225 @@
-import spaces
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-import gradio as gr
-from threading import Thread
-import os
 import json
-import uuid
-from datasets import Dataset
-from huggingface_hub import HfApi, login
 import time
-# Install required packages if not present
 from gradio_modal import Modal
-import huggingface_hub
-import datasets
-# Model setup
 checkpoint = "marin-community/marin-8b-instruct"
-device = "cuda"
 tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
-# Constants for dataset
-DATASET_REPO = "WillHeld/model-feedback"  # Replace with your username
-DATASET_PATH = "./feedback_data"  # Local path to store feedback
-DATASET_FILENAME = "feedback.jsonl"  # Filename for feedback data
-# Ensure feedback directory exists
-os.makedirs(DATASET_PATH, exist_ok=True)
-# Feedback storage functions
-def save_feedback_locally(conversation, satisfaction, feedback_text):
-    """Save feedback to a local JSONL file"""
-    # Create a unique ID for this feedback entry
-    feedback_id = str(uuid.uuid4())
-    # Create a timestamp
-    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-    # Prepare the feedback data
-    feedback_data = {
-        "id": feedback_id,
-        "timestamp": timestamp,
         "conversation": conversation,
         "satisfaction": satisfaction,
-        "feedback": feedback_text
     }
-    # Save to local file
-    feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
-    with open(feedback_file, "a") as f:
-        f.write(json.dumps(feedback_data) + "\n")
-    return feedback_id
-def push_feedback_to_hub(hf_token=None):
-    """Push the local feedback data to HuggingFace as a dataset"""
-    # Check if we have a token
-    if hf_token is None:
-        # Try to get token from environment variable
-        hf_token = os.environ.get("HF_TOKEN")
-        if hf_token is None:
-            print("No HuggingFace token provided. Cannot push to Hub.")
-            return False
     try:
-        # Login to HuggingFace
-        login(token=hf_token)
-        # Check if we have data to push
-        feedback_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
-        if not os.path.exists(feedback_file):
-            print("No feedback data to push.")
-            return False
-        # Load data from the JSONL file
-        with open(feedback_file, "r") as f:
-            feedback_data = [json.loads(line) for line in f]
-        # Create a dataset from the feedback data
-        dataset = Dataset.from_list(feedback_data)
-        # Push to Hub
-        dataset.push_to_hub(
             DATASET_REPO,
-            private=True  # Set to False if you want the dataset to be public
         )
-        print(f"Feedback data pushed to {DATASET_REPO} successfully.")
-        return True
-    except Exception as e:
-        print(f"Error pushing feedback data to Hub: {e}")
-        return False
-# Modified predict function to update conversation state
 @spaces.GPU(duration=120)
-def predict(message, history, state, temperature, top_p):
-    # Update history with user message
     history.append({"role": "user", "content": message})
-    input_text = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
-    inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
-    # Create a streamer
-    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    # Set up generation parameters
-    generation_kwargs = {
-        "input_ids": inputs,
-        "max_new_tokens": 1024,
-        "temperature": float(temperature),
-        "top_p": float(top_p),
-        "do_sample": True,
-        "streamer": streamer,
-    }
-    # Run generation in a separate thread
-    thread = Thread(target=model.generate, kwargs=generation_kwargs)
-    thread.start()
-    # Yield from the streamer as tokens are generated
-    partial_text = ""
-    for new_text in streamer:
-        partial_text += new_text
-        yield partial_text, state
-    # After full generation, update state with assistant's response
-    history.append({"role": "assistant", "content": partial_text})
-    state = history.copy()
-    return partial_text, state
-# Function to handle the research feedback submission
-def submit_research_feedback(conversation_state, satisfaction, feedback_text):
-    """Save user feedback both locally and to HuggingFace Hub"""
-    # Save locally first
-    feedback_id = save_feedback_locally(conversation_state, satisfaction, feedback_text)
-    # Get token from environment variable
-    env_token = os.environ.get("HF_TOKEN")
-    # Use environment token
-    push_success = push_feedback_to_hub(env_token)
-    if push_success:
-        status_msg = "Thank you for your valuable feedback! Your insights have been saved to the dataset."
-    else:
-        status_msg = "Thank you for your feedback! It has been saved locally, but couldn't be pushed to the dataset. Please check server logs."
-    return status_msg
-# Create the Gradio blocks interface
-with gr.Blocks() as demo:
-    # State to track conversation history
     conversation_state = gr.State([])
     with gr.Row():
         with gr.Column(scale=3):
-            # Custom chat function wrapper to update state
-            def chat_with_state(message, history, state, temperature, top_p):
-                for partial_response, updated_state in predict(message, history, state, temperature, top_p):
-                    # Update our state with each yield
-                    state = updated_state
-                    yield partial_response, state
-            # Create ChatInterface
             chatbot = gr.ChatInterface(
-                chat_with_state,
-                additional_inputs=[
-                    conversation_state,
-                    gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
-                    gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
-                ],
                 additional_outputs=[conversation_state],
-                type="messages"
             )
         with gr.Column(scale=1):
-            report_button = gr.Button("Share Feedback", variant="primary")
-    # Create the modal with feedback form components
-    with Modal(visible=False) as feedback_modal:
-        with gr.Column():
-            gr.Markdown("## Research Preview Feedback")
-            gr.Markdown("Thank you for testing our research model. Your feedback (positive or negative) helps us improve!")
-            satisfaction = gr.Radio(
-                ["Very satisfied", "Satisfied", "Neutral", "Unsatisfied", "Very unsatisfied"],
-                label="How would you rate your experience with this research model?",
-                value="Neutral"
-            )
-            feedback_text = gr.Textbox(
-                lines=5,
-                label="Share your observations (strengths, weaknesses, suggestions):",
-                placeholder="We welcome both positive feedback and constructive criticism to help improve this research prototype..."
-            )
-            submit_button = gr.Button("Submit Research Feedback", variant="primary")
-            response_text = gr.Textbox(label="Status", interactive=False)
-    # Connect the "Share Feedback" button to show the modal
-    report_button.click(
-        lambda: Modal(visible=True),
-        None,
-        feedback_modal
-    )
-    # Connect the submit button to the submit_research_feedback function with the current conversation state
-    submit_button.click(
-        submit_research_feedback,
-        inputs=[conversation_state, satisfaction, feedback_text],
-        outputs=response_text
     )
-# Launch the demo
-demo.launch()

+#!/usr/bin/env python
+"""HF Space for the *Marin‑8B‑Instruct* research preview
+-----------------------------------------------------
+A lightweight Gradio interface that
+  • streams chat completions from the `marin-community/marin-8b-instruct` model
+  • lets testers submit structured feedback (UX rating + free‑text)
+  • appends feedback to a local JSONL *and* merges it into a private Hub dataset
+The dataset is never overwritten: we always pull, merge, deduplicate, and push.
+"""
+from __future__ import annotations
+# ‑‑ standard lib
 import json
+import os
 import time
+import uuid
+from threading import Thread
+# ‑‑ third‑party deps (declared in requirements.txt of the Space)
+import gradio as gr
 from gradio_modal import Modal
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TextIteratorStreamer,
+)
+from datasets import Dataset, load_dataset, concatenate_datasets, DownloadMode
+from huggingface_hub import HfApi, login
+import spaces
+# ──────────────────────────── model & constants ─────────────────────────────
 checkpoint = "marin-community/marin-8b-instruct"
+device = "cuda"  # the Space runner gives us a GPU
+# download 🔥
 tokenizer = AutoTokenizer.from_pretrained(checkpoint)
 model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
+# feedback dataset details
+DATASET_REPO = "WillHeld/model-feedback"  # <‑‑ change to your namespace if needed
+DATA_DIR = "./feedback_data"
+DATA_FILE = "feedback.jsonl"
+os.makedirs(DATA_DIR, exist_ok=True)
+# ──────────────────────────── helpers ───────────────────────────────────────
+def save_feedback_locally(conversation: list[dict[str, str]],
+                          satisfaction: str,
+                          feedback_text: str) -> str:
+    """Append a single feedback record to a JSONL file and return its UUID."""
+    record = {
+        "id": str(uuid.uuid4()),
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
         "conversation": conversation,
         "satisfaction": satisfaction,
+        "feedback": feedback_text,
     }
+    fp = os.path.join(DATA_DIR, DATA_FILE)
+    with open(fp, "a", encoding="utf‑8") as f:
+        f.write(json.dumps(record, ensure_ascii=False) + "\n")
+    return record["id"]
+def push_feedback_to_hub(hf_token: str | None = None) -> bool:  # noqa: C901
+    """Merge freshly collected feedback with what’s already on the Hub.
+    Steps
+    -----
+    1.  Authenticate with `hf_token` (fall back to $HF_TOKEN env).
+    2.  Load *local* feedback just written in `feedback.jsonl`.
+    3.  Pull existing remote split (if any); concat & `unique("id")`.
+    4.  Push the merged dataset back. Never deletes remote shards ⇒ safe.
+    """
+    hf_token = hf_token or os.getenv("HF_TOKEN")
+    if not hf_token:
+        print("❌  No HF token — skipping Hub push.")
+        return False
+    login(token=hf_token)
+    fp = os.path.join(DATA_DIR, DATA_FILE)
+    if not os.path.exists(fp):
+        print("❌  Local feedback file missing; nothing to push.")
+        return False
+    # local rows → Dataset
+    with open(fp, encoding="utf‑8") as f:
+        local_ds = Dataset.from_list([json.loads(l) for l in f])
+    # try to pull remote
     try:
+        remote_ds = load_dataset(
             DATASET_REPO,
+            split="train",
+            token=hf_token,
+            download_mode=DownloadMode.FORCE_REDOWNLOAD,
         )
+        merged = concatenate_datasets([remote_ds, local_ds]).unique("id")
+    except FileNotFoundError:
+        # repo exists but empty
+        merged = local_ds
+    except Exception:
+        # repo may not exist yet – create & start fresh
+        HfApi(token=hf_token).create_repo(
+            repo_id=DATASET_REPO, repo_type="dataset", private=True
+        )
+        merged = local_ds
+    merged.push_to_hub(
+        DATASET_REPO,
+        private=True,
+        commit_message=f"Add {len(local_ds)} new feedback entries",
+    )
+    print(
+        f"✅  Pushed {len(local_ds)} rows; dataset now has {len(merged)} total.")
+    # (optional) clear local file once synced
+    # os.remove(fp)
+    return True
+# ──────────────────────────── chat backend ─────────────────────────────────
 @spaces.GPU(duration=120)
+def generate_response(message: str,
+                      history: list[dict[str, str]],
+                      temperature: float,
+                      top_p: float):
+    """Streaming generator used by the Gradio ChatInterface."""
+    # 1) add user message to history
     history.append({"role": "user", "content": message})
+    # 2) build model input via chat template
+    prompt = tokenizer.apply_chat_template(history, tokenize=False,
+                                           add_generation_prompt=True)
+    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True,
+                                    skip_special_tokens=True)
+    gen_kwargs = dict(
+        input_ids=input_ids,
+        max_new_tokens=1024,
+        temperature=float(temperature),
+        top_p=float(top_p),
+        do_sample=True,
+        streamer=streamer,
+    )
+    # run on a worker thread so we can yield tokens live
+    Thread(target=model.generate, kwargs=gen_kwargs).start()
+    partial = ""
+    for token in streamer:
+        partial += token
+        yield partial, history  # 1st out = msg, 2nd out = state
+    # once finished, commit assistant reply to history
+    history.append({"role": "assistant", "content": partial})
+    yield partial, history
+# ──────────────────────────── feedback handler ─────────────────────────────
+def submit_feedback(conversation_state: list[dict[str, str]],
+                    satisfaction: str,
+                    feedback_text: str):
+    """Callback for the *Submit Research Feedback* button."""
+    save_feedback_locally(conversation_state, satisfaction, feedback_text)
+    pushed = push_feedback_to_hub()
+    if pushed:
+        return "✅  Thanks! Your feedback is safely stored."
+    return "⚠️  Saved locally; Hub push failed. Check server logs."
+# ──────────────────────────── UI layout ────────────────────────────────────
+with gr.Blocks(title="Marin‑8B Research Preview") as demo:
+    # state object to surface chat history to the feedback form
     conversation_state = gr.State([])
     with gr.Row():
+        # ——— Chat column ———
         with gr.Column(scale=3):
             chatbot = gr.ChatInterface(
+                fn=generate_response,
+                additional_inputs=[conversation_state,  # keeps state in sync
+                                   gr.Slider(0.1, 2.0, value=0.7, step=0.1,
+                                             label="Temperature"),
+                                   gr.Slider(0.1, 1.0, value=0.9, step=0.05,
+                                             label="Top‑P")],
                 additional_outputs=[conversation_state],
+                type="messages",
             )
+        # ——— Sidebar column ———
         with gr.Column(scale=1):
+            report_btn = gr.Button("Share Feedback", variant="primary")
+    # feedback modal (hidden by default)
+    with Modal(visible=False) as fb_modal:
+        gr.Markdown("## Research Preview Feedback")
+        gr.Markdown("We appreciate your help improving Marin‑8B! ✨")
+        sat_radio = gr.Radio([
+            "Very satisfied", "Satisfied", "Neutral",
+            "Unsatisfied", "Very unsatisfied"],
+            label="Overall experience",
+            value="Neutral",
+        )
+        fb_text = gr.Textbox(lines=6, label="Comments / suggestions")
+        send_btn = gr.Button("Submit", variant="primary")
+        status_box = gr.Textbox(label="Status", interactive=False)
+    # interactions
+    report_btn.click(lambda: None, None, None, _js="() => window.modal_open()")
+    # the JS helper above relies on gradio‑modal’s injected helper.
+    send_btn.click(
+        submit_feedback,
+        inputs=[conversation_state, sat_radio, fb_text],
+        outputs=status_box,
     )
+# ──────────────────────────── run! ─────────────────────────────────────────
+if __name__ == "__main__":
+    demo.launch()