Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ from threading import Thread
|
|
5 |
import os
|
6 |
import json
|
7 |
import uuid
|
8 |
-
from datasets import Dataset
|
9 |
from huggingface_hub import HfApi, login
|
10 |
import time
|
11 |
|
@@ -28,6 +28,63 @@ DATASET_FILENAME = "feedback.jsonl" # Filename for feedback data
|
|
28 |
# Ensure feedback directory exists
|
29 |
os.makedirs(DATASET_PATH, exist_ok=True)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# Feedback storage functions
|
32 |
def save_feedback_locally(conversation, satisfaction, feedback_text):
|
33 |
"""Save feedback to a local JSONL file"""
|
@@ -96,13 +153,18 @@ def push_feedback_to_hub(hf_token=None):
|
|
96 |
# Modified predict function to update conversation state
|
97 |
@spaces.GPU(duration=120)
|
98 |
def predict(message, history, state, temperature, top_p):
|
|
|
|
|
|
|
99 |
# Update history with user message
|
100 |
-
|
101 |
|
102 |
-
# Update the conversation state
|
103 |
-
|
|
|
|
|
104 |
|
105 |
-
input_text = tokenizer.apply_chat_template(
|
106 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
107 |
|
108 |
# Create a streamer
|
@@ -127,11 +189,22 @@ def predict(message, history, state, temperature, top_p):
|
|
127 |
partial_text = ""
|
128 |
for new_text in streamer:
|
129 |
partial_text += new_text
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
-
# After full generation, update state with assistant's response
|
133 |
-
history.append({"role": "assistant", "content": partial_text})
|
134 |
-
state = history.copy()
|
135 |
return partial_text, state
|
136 |
|
137 |
# Function to handle the research feedback submission
|
@@ -164,8 +237,8 @@ with gr.Blocks() as demo:
|
|
164 |
def chat_with_state(message, history, state, temperature, top_p):
|
165 |
for partial_response, updated_state in predict(message, history, state, temperature, top_p):
|
166 |
# Update our state with each yield
|
167 |
-
|
168 |
-
yield partial_response
|
169 |
|
170 |
# Create ChatInterface
|
171 |
chatbot = gr.ChatInterface(
|
@@ -175,7 +248,6 @@ with gr.Blocks() as demo:
|
|
175 |
gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
|
176 |
gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
|
177 |
],
|
178 |
-
additional_outputs=[conversation_state],
|
179 |
type="messages"
|
180 |
)
|
181 |
|
|
|
5 |
import os
|
6 |
import json
|
7 |
import uuid
|
8 |
+
from datasets import Dataset, load_dataset
|
9 |
from huggingface_hub import HfApi, login
|
10 |
import time
|
11 |
|
|
|
28 |
# Ensure feedback directory exists
|
29 |
os.makedirs(DATASET_PATH, exist_ok=True)
|
30 |
|
31 |
+
# Sync existing dataset from Hub if available
|
32 |
+
def sync_dataset_from_hub():
|
33 |
+
"""Download existing dataset from Hub and merge with local data"""
|
34 |
+
try:
|
35 |
+
# Try to get token from environment variable
|
36 |
+
hf_token = os.environ.get("HF_TOKEN")
|
37 |
+
if hf_token:
|
38 |
+
login(token=hf_token)
|
39 |
+
|
40 |
+
# Check if the dataset exists on Hub
|
41 |
+
api = HfApi()
|
42 |
+
try:
|
43 |
+
dataset_info = api.dataset_info(DATASET_REPO)
|
44 |
+
# Dataset exists, download it
|
45 |
+
print(f"Syncing existing dataset from {DATASET_REPO}")
|
46 |
+
remote_dataset = load_dataset(DATASET_REPO)
|
47 |
+
|
48 |
+
# Convert to list of dictionaries
|
49 |
+
remote_data = [item for item in remote_dataset['train']]
|
50 |
+
|
51 |
+
# Check if local file exists
|
52 |
+
local_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
|
53 |
+
local_data = []
|
54 |
+
|
55 |
+
if os.path.exists(local_file):
|
56 |
+
# Read local data
|
57 |
+
with open(local_file, 'r') as f:
|
58 |
+
for line in f:
|
59 |
+
try:
|
60 |
+
local_data.append(json.loads(line))
|
61 |
+
except json.JSONDecodeError:
|
62 |
+
continue
|
63 |
+
|
64 |
+
# Merge data (using IDs to avoid duplicates)
|
65 |
+
all_items = {}
|
66 |
+
for item in remote_data + local_data:
|
67 |
+
all_items[item['id']] = item
|
68 |
+
|
69 |
+
# Write back merged data
|
70 |
+
with open(local_file, 'w') as f:
|
71 |
+
for item in all_items.values():
|
72 |
+
f.write(json.dumps(item) + '\n')
|
73 |
+
|
74 |
+
print(f"Synced {len(all_items)} feedback items")
|
75 |
+
return True
|
76 |
+
|
77 |
+
except Exception as e:
|
78 |
+
print(f"Dataset {DATASET_REPO} does not exist yet or could not be accessed: {e}")
|
79 |
+
return False
|
80 |
+
|
81 |
+
except Exception as e:
|
82 |
+
print(f"Error syncing dataset: {e}")
|
83 |
+
return False
|
84 |
+
|
85 |
+
# Call sync on startup
|
86 |
+
sync_dataset_from_hub()
|
87 |
+
|
88 |
# Feedback storage functions
|
89 |
def save_feedback_locally(conversation, satisfaction, feedback_text):
|
90 |
"""Save feedback to a local JSONL file"""
|
|
|
153 |
# Modified predict function to update conversation state
|
154 |
@spaces.GPU(duration=120)
|
155 |
def predict(message, history, state, temperature, top_p):
|
156 |
+
# Create a deep copy of history to ensure we don't modify the original
|
157 |
+
current_history = history.copy()
|
158 |
+
|
159 |
# Update history with user message
|
160 |
+
current_history.append({"role": "user", "content": message})
|
161 |
|
162 |
+
# Update the conversation state with user message
|
163 |
+
if not state:
|
164 |
+
state = []
|
165 |
+
state = current_history.copy()
|
166 |
|
167 |
+
input_text = tokenizer.apply_chat_template(current_history, tokenize=False, add_generation_prompt=True)
|
168 |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
169 |
|
170 |
# Create a streamer
|
|
|
189 |
partial_text = ""
|
190 |
for new_text in streamer:
|
191 |
partial_text += new_text
|
192 |
+
|
193 |
+
# Create a temporary state with partial response
|
194 |
+
temp_history = current_history.copy()
|
195 |
+
temp_history.append({"role": "assistant", "content": partial_text})
|
196 |
+
temp_state = temp_history.copy()
|
197 |
+
|
198 |
+
yield partial_text, temp_state
|
199 |
+
|
200 |
+
# After full generation, update state with assistant's final response
|
201 |
+
current_history.append({"role": "assistant", "content": partial_text})
|
202 |
+
state = current_history.copy()
|
203 |
+
|
204 |
+
# Print debug info
|
205 |
+
print(f"Updated state with {len(state)} messages")
|
206 |
+
print(f"Last message: {state[-1]['role']}: {state[-1]['content'][:30]}...")
|
207 |
|
|
|
|
|
|
|
208 |
return partial_text, state
|
209 |
|
210 |
# Function to handle the research feedback submission
|
|
|
237 |
def chat_with_state(message, history, state, temperature, top_p):
|
238 |
for partial_response, updated_state in predict(message, history, state, temperature, top_p):
|
239 |
# Update our state with each yield
|
240 |
+
conversation_state.value = updated_state
|
241 |
+
yield partial_response
|
242 |
|
243 |
# Create ChatInterface
|
244 |
chatbot = gr.ChatInterface(
|
|
|
248 |
gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
|
249 |
gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
|
250 |
],
|
|
|
251 |
type="messages"
|
252 |
)
|
253 |
|