Spaces:
Sleeping
Sleeping
import os | |
from datetime import datetime | |
import subprocess | |
def run_hero_reranking(user_id, end_date): | |
base_dir = f"outputs/{user_id}_{end_date}" | |
hero_dir = os.path.join(base_dir, "hero") | |
os.makedirs(hero_dir, exist_ok=True) | |
hyde_output = os.path.join(hero_dir, "manifesto_icl_hyde_fc.json") | |
def safe_run(cmd, timeout=600): | |
try: | |
print(f"Running: {' '.join(cmd)}") | |
subprocess.run(cmd, check=True, timeout=timeout) | |
except subprocess.CalledProcessError as e: | |
print(f"[β ERROR] Subprocess failed: {e}") | |
if e.stderr: | |
print("[stderr]:", e.stderr.decode()) | |
raise | |
except subprocess.TimeoutExpired: | |
print(f"[β TIMEOUT] Command timed out: {' '.join(cmd)}") | |
raise | |
# Step 3.2: retrieval | |
print("π Step 3.2: Retrieval from knowledge store ...") | |
knowledge_store_dir = os.path.join(base_dir, "augmented_data_store") | |
retrieval_output = os.path.join(hero_dir, "manifesto_icl_retrieval_top_k_QA.json") | |
if not os.path.exists(retrieval_output): | |
safe_run([ | |
"python3.12", "baseline/retrieval_optimized.py", | |
"--knowledge_store_dir", knowledge_store_dir, | |
"--target_data", hyde_output, | |
"--json_output", retrieval_output | |
]) | |
# Step 3.3: reranking | |
print("π·οΈ Step 3.3: Reranking retrieved evidence ...") | |
rerank_output = os.path.join(hero_dir, "manifesto_icl_reranking_top_k_QA.json") | |
if not os.path.exists(rerank_output): | |
safe_run([ | |
"python3.12", "baseline/reranking_optimized.py", | |
"--target_data", retrieval_output, | |
"--json_output", rerank_output | |
]) | |
return { | |
"hyde": hyde_output, | |
"retrieved": retrieval_output, | |
"reranked": rerank_output, | |
} | |
if __name__ == "__main__": | |
output_files = run_step3_hero_pipeline(user_id="xxx", end_date="20250604") | |
for key, path in output_files.items(): | |
print(f"β {key}: {path}") | |