yulongchen commited on
Commit
fcd14e1
·
1 Parent(s): 44ef3dd

Add system

Browse files
app.py CHANGED
@@ -169,18 +169,17 @@ def run_model():
169
  events = df.to_dict(orient="records")
170
  log_entry = {
171
  "requested_time": timestamp,
 
172
  "pledge": claim,
173
  "suggestion_meta": suggestion_meta,
174
- "user_id": user_id,
 
175
  "pledge_author": pledge_author,
176
  "pledge_date": pledge_date,
177
  "events": events
178
  }
179
  default_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"
180
- # step_id = outputs["step_id"]
181
- # if update_status:
182
- # update_status(step_id, "All done!")
183
- # step_id += 1
184
  with open(default_log_path, "w") as f:
185
  f.write(json.dumps(log_entry, indent=1))
186
 
@@ -279,62 +278,6 @@ def receive_feedback():
279
 
280
  return jsonify({'status': 'success'})
281
 
282
- # @app.route("/api/feedback", methods=["POST"])
283
- # def receive_feedback():
284
- # data = request.get_json()
285
- # pledge = data.get("pledge", "no_pledge_text")
286
- # feedback_list = data.get("feedback", [])
287
- # filename = data.get("file")
288
- # file_path = os.path.join(TMP_DIR, filename)
289
- # pledge_date = data.get("pledge_date", "")
290
- # pledge_author = data.get("pledge_author", "")
291
-
292
- # if not os.path.exists(file_path):
293
- # return jsonify({"error": "Event file not found"}), 400
294
-
295
- # with open(file_path, "r") as f:
296
- # events = json.load(f)
297
-
298
- # feedback_dict = {int(item['eventIndex']): item['answer'] for item in feedback_list}
299
-
300
- # for idx, event in enumerate(events):
301
- # event["user_feedback"] = feedback_dict.get(idx)
302
-
303
- # log_entry = {
304
- # "requested_time": data.get("timestamp"),
305
- # "user_id": data.get("user_id"),
306
- # "pledge": pledge,
307
- # "pledge_author": pledge_author,
308
- # "pledge_date": pledge_date,
309
- # "events": events
310
- # }
311
-
312
- # timestamp = data.get("timestamp")
313
- # user_id = data.get("user_id")
314
- # timestamp = data.get("timestamp")
315
-
316
- # if not user_id or not timestamp:
317
- # return jsonify({'status': 'error', 'detail': 'Missing user_id or timestamp'}), 400
318
-
319
- # local_filename = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"
320
-
321
- # with open(local_filename, "w") as f:
322
- # f.write(json.dumps(log_entry, indent=1))
323
-
324
- # try:
325
- # api = HfApi()
326
- # api.upload_file(
327
- # path_or_fileobj=local_filename,
328
- # path_in_repo=f"logs/feedback_{timestamp}_{user_id}.jsonl",
329
- # repo_id=HF_DATASET_REPO,
330
- # repo_type="dataset",
331
- # token=HF_TOKEN
332
- # )
333
- # except Exception as e:
334
- # return jsonify({'status': 'partial_success', 'error': str(e)}), 500
335
-
336
- # return jsonify({'status': 'success'})
337
-
338
  @app.route("/download-feedback/<filename>")
339
  def download_feedback_file(filename):
340
  return send_from_directory(FEEDBACK_DIR, filename, as_attachment=True)
 
169
  events = df.to_dict(orient="records")
170
  log_entry = {
171
  "requested_time": timestamp,
172
+ "user_id": user_id,
173
  "pledge": claim,
174
  "suggestion_meta": suggestion_meta,
175
+ "time_start": time_start,
176
+ "time_end": time_end,
177
  "pledge_author": pledge_author,
178
  "pledge_date": pledge_date,
179
  "events": events
180
  }
181
  default_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"
182
+
 
 
 
183
  with open(default_log_path, "w") as f:
184
  f.write(json.dumps(log_entry, indent=1))
185
 
 
278
 
279
  return jsonify({'status': 'success'})
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  @app.route("/download-feedback/<filename>")
282
  def download_feedback_file(filename):
283
  return send_from_directory(FEEDBACK_DIR, filename, as_attachment=True)
system/augmented_searching.py CHANGED
@@ -8,7 +8,7 @@ from pathlib import Path
8
  import spacy
9
 
10
  def google_search(query, api_key, search_engine_id, start_date, end_date):
11
- print(f"[SYSTEM] Calling Google Search API for: {query}")
12
  sort = f"date:r:{start_date}:{end_date}"
13
  url = "https://www.googleapis.com/customsearch/v1"
14
  params = {
@@ -56,7 +56,6 @@ def run_augmented_searching(qa_file, pipeline_base_dir, suggestion_meta, pledge_
56
  qa_lines = open(f"{qa_file}","r").readlines()[idx]
57
  qa_lines = json.loads(qa_lines)
58
  claim_text = f"{qa_lines['claim']}"
59
- print(qa_lines)
60
 
61
 
62
  api_key = os.environ.get("GOOGLE_API_KEY")
@@ -79,7 +78,6 @@ def run_augmented_searching(qa_file, pipeline_base_dir, suggestion_meta, pledge_
79
 
80
 
81
  results = google_search(claim_text, api_key, search_engine_id, start_date, end_date)
82
- print(results)
83
  for result in results:
84
  if result["link"] not in urls and "fullfact.org/government-tracker" not in result["link"]:
85
  string_values.append("claim")
 
8
  import spacy
9
 
10
  def google_search(query, api_key, search_engine_id, start_date, end_date):
11
+ # print(f"[SYSTEM] Calling Google Search API for: {query}")
12
  sort = f"date:r:{start_date}:{end_date}"
13
  url = "https://www.googleapis.com/customsearch/v1"
14
  params = {
 
56
  qa_lines = open(f"{qa_file}","r").readlines()[idx]
57
  qa_lines = json.loads(qa_lines)
58
  claim_text = f"{qa_lines['claim']}"
 
59
 
60
 
61
  api_key = os.environ.get("GOOGLE_API_KEY")
 
78
 
79
 
80
  results = google_search(claim_text, api_key, search_engine_id, start_date, end_date)
 
81
  for result in results:
82
  if result["link"] not in urls and "fullfact.org/government-tracker" not in result["link"]:
83
  string_values.append("claim")
system/baseline/reranking_optimized.py CHANGED
@@ -78,10 +78,10 @@ def select_top_k(claim, results, top_k):
78
  top_k_sentences_urls = []
79
 
80
  i = 0
81
- print(results)
82
  claim = remove_special_chars_except_spaces(claim).lower()
83
  while len(top_k_sentences_urls) < top_k and i < len(results):
84
- print(i)
85
  sentence = remove_special_chars_except_spaces(results[i]['sentence']).lower()
86
 
87
  if sentence not in dup_check:
 
78
  top_k_sentences_urls = []
79
 
80
  i = 0
81
+ # print(results)
82
  claim = remove_special_chars_except_spaces(claim).lower()
83
  while len(top_k_sentences_urls) < top_k and i < len(results):
84
+ # print(i)
85
  sentence = remove_special_chars_except_spaces(results[i]['sentence']).lower()
86
 
87
  if sentence not in dup_check:
system/ee.py CHANGED
@@ -49,7 +49,7 @@ def run_gpt4_event_extraction(data_dir, max_tokens=100000):
49
 
50
  output_path = os.path.join(output_dir, f"gpt4o_results_{ID}_claim.json")
51
  if os.path.exists(output_path):
52
- print(f"输出已存在 {output_path}")
53
 
54
  else:
55
 
@@ -71,7 +71,7 @@ def run_gpt4_event_extraction(data_dir, max_tokens=100000):
71
 
72
  try:
73
  output = gpt_4o(input_text)
74
- print(f"GPT-4o Response: {output}")
75
  results.append({
76
  "url": doc["url"],
77
  "title": doc["metadata"]["title"],
 
49
 
50
  output_path = os.path.join(output_dir, f"gpt4o_results_{ID}_claim.json")
51
  if os.path.exists(output_path):
52
+ print(f"Already exist: {output_path}")
53
 
54
  else:
55
 
 
71
 
72
  try:
73
  output = gpt_4o(input_text)
74
+ # print(f"GPT-4o Response: {output}")
75
  results.append({
76
  "url": doc["url"],
77
  "title": doc["metadata"]["title"],
system/initial_searching.py CHANGED
@@ -11,7 +11,6 @@ import subprocess
11
  try:
12
  nlp = spacy.load("en_core_web_sm")
13
  except OSError:
14
- print("🔁 Downloading en_core_web_sm model ...")
15
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
16
  nlp = spacy.load("en_core_web_sm")
17
 
 
11
  try:
12
  nlp = spacy.load("en_core_web_sm")
13
  except OSError:
 
14
  subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
15
  nlp = spacy.load("en_core_web_sm")
16
 
system/process_time.py CHANGED
@@ -223,7 +223,7 @@ def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggest
223
  else:
224
  event_date_and_pub_date = original_date
225
 
226
- test_instance = f"Pledge: {pledge} (Speaker: {pledge_author}; Pledge Date: {pledge_date})\nEvent Summary: {event['event']} (Event Date: {original_date})\nIs this event summary useful?"
227
 
228
  # print(test_instance)
229
 
 
223
  else:
224
  event_date_and_pub_date = original_date
225
 
226
+ test_instance = f"Pledge: {pledge} (Speaker: {pledge_author}; Pledge Date: {pledge_date})\nEvent Summary: {event['event']} (Event Date: {original_date})\nIs this event summary useful to track the fulfilment of this pledge"
227
 
228
  # print(test_instance)
229