from flask import Flask, jsonify, send_file, request, send_from_directory from flask_cors import CORS import os, json, uuid, time import pandas as pd from datetime import datetime, timedelta from huggingface_hub import HfApi import sys from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity from system.pledge_tracking import run_pipeline from huggingface_hub import hf_hub_download import spacy import traceback import threading nlp = spacy.load("en_core_web_sm") app = Flask(__name__, static_folder='.') CORS(app) HF_DATASET_REPO = "PledgeTracker/demo_feedback" HF_TOKEN = os.environ.get("HF_TOKEN") TMP_DIR = "tmp" FEEDBACK_DIR = "feedback_logs" os.makedirs(TMP_DIR, exist_ok=True) os.makedirs(FEEDBACK_DIR, exist_ok=True) REFERENCE_PLEDGES = [] REFERENCE_PLEDGE_PATH = hf_hub_download( repo_id="PledgeTracker/demo_feedback", filename="existing_pledges.txt", repo_type="dataset", token=os.environ["HF_TOKEN"] ) if os.path.exists(REFERENCE_PLEDGE_PATH): with open(REFERENCE_PLEDGE_PATH, "r") as f: REFERENCE_PLEDGES = [line.strip() for line in f if line.strip()] else: print(f"Missing reference pledge file: {REFERENCE_PLEDGE_PATH}") def lemmatize(text): doc = nlp(text) return " ".join([token.lemma_ for token in doc if not token.is_punct and not token.is_space]) @app.route("/api/similar-pledges", methods=["POST"]) def similar_pledges(): data = request.get_json() claim = data.get("claim", "").strip() if not claim or not REFERENCE_PLEDGES: return jsonify({"suggestions": []}) all_pledges = [claim] + REFERENCE_PLEDGES lemmatized_pledges = [lemmatize(p) for p in all_pledges] vectorizer = TfidfVectorizer().fit_transform(lemmatized_pledges) similarities = cosine_similarity(vectorizer[0:1], vectorizer[1:]).flatten() filtered = [(i, similarities[i]) for i in range(len(similarities)) if similarities[i] > 0.3] top_filtered = sorted(filtered, key=lambda x: x[1], reverse=True)[:5] suggestions = [ {"text": REFERENCE_PLEDGES[i], "index": int(i)} for i, score in top_filtered ] return jsonify({"suggestions": suggestions}) def calculate_time_range(option: str, pledge_date: str = None): today = datetime.today() # pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d") if isinstance(pledge_date, str): pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d") elif not isinstance(pledge_date, datetime): raise ValueError("pledge_date must be a str or datetime") if option == "week": one_week_ago = today - timedelta(days=7) start = max(one_week_ago, pledge_date) elif option == "month": one_month_ago = today - timedelta(days=30) start = max(one_month_ago, pledge_date) elif option == "since_pledge_date": if not pledge_date: raise ValueError("Pledge date is required for 'since_pledge_date' option") start = pledge_date else: raise ValueError("Invalid time range option") print(start, pledge_date) return start.strftime("%Y%m%d"), today.strftime("%Y%m%d") @app.route("/") def serve_html(): return send_from_directory('.', 'test.html') @app.route("/api/status") def check_status(): user_id = request.args.get("user_id") timestamp = request.args.get("timestamp") log_file_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}_status.log") if not os.path.exists(log_file_path): return jsonify({"status": {}}), 200 try: with open(log_file_path, "r") as f: status = json.load(f) except Exception: status = {} return jsonify({"status": status}) @app.route("/api/run-model", methods=["POST"]) def run_model(): data = request.get_json() claim = data.get("claim", "no input") time_range_option = data.get("time_range", "month") system_start_time = datetime.now() suggestion_meta = data.get("suggestion_meta") pledge_date = data.get("pledge_date", "") pledge_author = data.get("pledge_author", "") timestamp = data.get("timestamp") or time.strftime("%Y-%m-%d_%H-%M-%S") user_id = data.get("user_id") or str(uuid.uuid4())[:8] log_file_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}_status.log") status_lock = threading.Lock() def update_status(step_id, msg): print(f"[STATUS] Step {step_id}: {msg}") with status_lock: if os.path.exists(log_file_path): try: with open(log_file_path, "r") as f: current = json.load(f) except Exception: current = {} else: current = {} current[str(step_id)] = f"{msg}" with open(log_file_path, "w") as f: json.dump(current, f, indent=2) try: time_start, time_end = calculate_time_range(time_range_option, pledge_date=pledge_date) print(f"[DEMO] Received claim: {claim}") print(f"[DEMO] Time range: {time_start} ~ {time_end}") print(f"[DEMO] Pledge date range: {pledge_date}") # user_id = str(uuid.uuid4())[:8] # outputs = run_pipeline(claim, pledge_date, pledge_author, time_start, timestamp, user_id) update_status(0, "📌 Starting the system ...") print(suggestion_meta) outputs = run_pipeline( claim, pledge_date, pledge_author, time_start, timestamp, user_id, update_fn=update_status, suggestion_meta=suggestion_meta ) df = pd.read_excel(outputs["sorted_events"]) json_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}.json") df.to_json(json_path, orient="records", indent=2) system_end_time = datetime.now() runtime = system_end_time - system_start_time events = df.to_dict(orient="records") log_entry = { "requested_time": timestamp, "user_id": user_id, "pledge": claim, "suggestion_meta": suggestion_meta, "time_start": time_start, "time_end": time_end, "runtime": runtime.total_seconds(), "pledge_author": pledge_author, "pledge_date": pledge_date, "events": events } default_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl" with open(default_log_path, "w") as f: f.write(json.dumps(log_entry, indent=1)) tsv_path = outputs["augmented_tsv_file"] try: api = HfApi() api.upload_file( path_or_fileobj=default_log_path, path_in_repo=f"logs/feedback_{timestamp}_{user_id}.jsonl", repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN ) api.upload_file( path_or_fileobj=tsv_path, path_in_repo=f"logs/augmented_{timestamp}_{user_id}.tsv", repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN ) except Exception as e: traceback.print_exc() print(f"[Default Feedback Upload Error] {e}") return jsonify({ "status": "success", "file": f"{timestamp}_{user_id}.json", "user_id": user_id, "timestamp": timestamp }) except Exception as e: traceback.print_exc() return jsonify({"status": "error", "detail": str(e)}), 500 @app.route("/api/events") def get_events(): filename = request.args.get("file") file_path = os.path.join(TMP_DIR, filename) if not os.path.exists(file_path): return jsonify({"error": "File not found"}), 404 with open(file_path, "r") as f: events = json.load(f) return jsonify(events) @app.route("/api/feedback", methods=["POST"]) def receive_feedback(): data = request.get_json() pledge = data.get("pledge", "no_pledge_text") feedback_list = data.get("feedback", []) filename = data.get("file") file_path = os.path.join(TMP_DIR, filename) timestamp = data.get("timestamp") user_id = data.get("user_id") if not user_id or not timestamp: return jsonify({'status': 'error', 'detail': 'Missing user_id or timestamp'}), 400 if not os.path.exists(file_path): return jsonify({"error": "Event file not found"}), 400 with open(file_path, "r") as f: events = json.load(f) suggestion_meta = None time_start = None time_end = None try: prev_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl" with open(prev_log_path, "r") as f: previous_log = json.load(f) suggestion_meta = previous_log.get("suggestion_meta") time_start = previous_log.get("time_start") time_end = previous_log.get("time_end") pledge_author = previous_log.get("pledge_author") pledge_date = previous_log.get("pledge_date") runtime = previous_log.get("runtime") except Exception: pass feedback_dict = {int(item['eventIndex']): item['answer'] for item in feedback_list} for idx, event in enumerate(events): event["user_feedback"] = feedback_dict.get(idx) log_entry = { "requested_time": timestamp, "user_id": user_id, "pledge": pledge, "suggestion_meta": suggestion_meta, "time_start": time_start, "time_end": time_end, "runtime": runtime, "pledge_author": pledge_author, "pledge_date": pledge_date, "events": events } local_filename = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl" with open(local_filename, "w") as f: f.write(json.dumps(log_entry, indent=1)) try: api = HfApi() api.upload_file( path_or_fileobj=local_filename, path_in_repo=f"logs/feedback_{timestamp}_{user_id}.jsonl", repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN ) except Exception as e: return jsonify({'status': 'partial_success', 'error': str(e)}), 500 return jsonify({'status': 'success'}) @app.route("/download-feedback/") def download_feedback_file(filename): return send_from_directory(FEEDBACK_DIR, filename, as_attachment=True) @app.route("/feedback-files") def list_feedback_files(): files = os.listdir(FEEDBACK_DIR) return jsonify(sorted(files)) @app.route("/download") def download_excel(): file = request.args.get("file") if not file: return "Missing file param", 400 json_path = os.path.join(TMP_DIR, file) if not os.path.exists(json_path): return "Event file not found", 404 with open(json_path, "r") as f: data = json.load(f) df = pd.DataFrame(data) xlsx_path = os.path.join(TMP_DIR, file.replace(".json", ".xlsx")) df.to_excel(xlsx_path, index=False) return send_file(xlsx_path, as_attachment=True) if __name__ == '__main__': app.run(host="0.0.0.0", port=7860)