store-user-feedback

Sleeping

App Files Files Community

WillHeld commited on May 6

Commit

f66cdbc

verified ·

1 Parent(s): 51b5709

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -12

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from threading import Thread
 import os
 import json
 import uuid
-from datasets import Dataset
 from huggingface_hub import HfApi, login
 import time
@@ -28,6 +28,63 @@ DATASET_FILENAME = "feedback.jsonl"  # Filename for feedback data
 # Ensure feedback directory exists
 os.makedirs(DATASET_PATH, exist_ok=True)
 # Feedback storage functions
 def save_feedback_locally(conversation, satisfaction, feedback_text):
     """Save feedback to a local JSONL file"""
@@ -96,13 +153,18 @@ def push_feedback_to_hub(hf_token=None):
 # Modified predict function to update conversation state
 @spaces.GPU(duration=120)
 def predict(message, history, state, temperature, top_p):
     # Update history with user message
-    history.append({"role": "user", "content": message})
-    # Update the conversation state
-    state = history.copy()
-    input_text = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
     # Create a streamer
@@ -127,11 +189,22 @@ def predict(message, history, state, temperature, top_p):
     partial_text = ""
     for new_text in streamer:
         partial_text += new_text
-        yield partial_text, state
-    # After full generation, update state with assistant's response
-    history.append({"role": "assistant", "content": partial_text})
-    state = history.copy()
     return partial_text, state
 # Function to handle the research feedback submission
@@ -164,8 +237,8 @@ with gr.Blocks() as demo:
             def chat_with_state(message, history, state, temperature, top_p):
                 for partial_response, updated_state in predict(message, history, state, temperature, top_p):
                     # Update our state with each yield
-                    state = updated_state
-                    yield partial_response, state
             # Create ChatInterface
             chatbot = gr.ChatInterface(
@@ -175,7 +248,6 @@ with gr.Blocks() as demo:
                     gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
                     gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
                 ],
-                additional_outputs=[conversation_state],
                 type="messages"
             )

 import os
 import json
 import uuid
+from datasets import Dataset, load_dataset
 from huggingface_hub import HfApi, login
 import time
 # Ensure feedback directory exists
 os.makedirs(DATASET_PATH, exist_ok=True)
+# Sync existing dataset from Hub if available
+def sync_dataset_from_hub():
+    """Download existing dataset from Hub and merge with local data"""
+    try:
+        # Try to get token from environment variable
+        hf_token = os.environ.get("HF_TOKEN")
+        if hf_token:
+            login(token=hf_token)
+        # Check if the dataset exists on Hub
+        api = HfApi()
+        try:
+            dataset_info = api.dataset_info(DATASET_REPO)
+            # Dataset exists, download it
+            print(f"Syncing existing dataset from {DATASET_REPO}")
+            remote_dataset = load_dataset(DATASET_REPO)
+            # Convert to list of dictionaries
+            remote_data = [item for item in remote_dataset['train']]
+            # Check if local file exists
+            local_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
+            local_data = []
+            if os.path.exists(local_file):
+                # Read local data
+                with open(local_file, 'r') as f:
+                    for line in f:
+                        try:
+                            local_data.append(json.loads(line))
+                        except json.JSONDecodeError:
+                            continue
+            # Merge data (using IDs to avoid duplicates)
+            all_items = {}
+            for item in remote_data + local_data:
+                all_items[item['id']] = item
+            # Write back merged data
+            with open(local_file, 'w') as f:
+                for item in all_items.values():
+                    f.write(json.dumps(item) + '\n')
+            print(f"Synced {len(all_items)} feedback items")
+            return True
+        except Exception as e:
+            print(f"Dataset {DATASET_REPO} does not exist yet or could not be accessed: {e}")
+            return False
+    except Exception as e:
+        print(f"Error syncing dataset: {e}")
+        return False
+# Call sync on startup
+sync_dataset_from_hub()
 # Feedback storage functions
 def save_feedback_locally(conversation, satisfaction, feedback_text):
     """Save feedback to a local JSONL file"""
 # Modified predict function to update conversation state
 @spaces.GPU(duration=120)
 def predict(message, history, state, temperature, top_p):
+    # Create a deep copy of history to ensure we don't modify the original
+    current_history = history.copy()
     # Update history with user message
+    current_history.append({"role": "user", "content": message})
+    # Update the conversation state with user message
+    if not state:
+        state = []
+    state = current_history.copy()
+    input_text = tokenizer.apply_chat_template(current_history, tokenize=False, add_generation_prompt=True)
     inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
     # Create a streamer
     partial_text = ""
     for new_text in streamer:
         partial_text += new_text
+        # Create a temporary state with partial response
+        temp_history = current_history.copy()
+        temp_history.append({"role": "assistant", "content": partial_text})
+        temp_state = temp_history.copy()
+        yield partial_text, temp_state
+    # After full generation, update state with assistant's final response
+    current_history.append({"role": "assistant", "content": partial_text})
+    state = current_history.copy()
+    # Print debug info
+    print(f"Updated state with {len(state)} messages")
+    print(f"Last message: {state[-1]['role']}: {state[-1]['content'][:30]}...")
     return partial_text, state
 # Function to handle the research feedback submission
             def chat_with_state(message, history, state, temperature, top_p):
                 for partial_response, updated_state in predict(message, history, state, temperature, top_p):
                     # Update our state with each yield
+                    conversation_state.value = updated_state
+                    yield partial_response
             # Create ChatInterface
             chatbot = gr.ChatInterface(
                     gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
                     gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
                 ],
                 type="messages"
             )