File size: 2,045 Bytes
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
5b78ed1
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
from datetime import datetime
import subprocess


def run_hero_reranking(user_id, end_date):
    base_dir = f"outputs/{user_id}_{end_date}"
    hero_dir = os.path.join(base_dir, "hero")
    os.makedirs(hero_dir, exist_ok=True)

    hyde_output = os.path.join(hero_dir, "manifesto_icl_hyde_fc.json")

    def safe_run(cmd, timeout=600):
        try:
            print(f"Running: {' '.join(cmd)}")
            subprocess.run(cmd, check=True, timeout=timeout)
        except subprocess.CalledProcessError as e:
            print(f"[❌ ERROR] Subprocess failed: {e}")
            if e.stderr:
                print("[stderr]:", e.stderr.decode())
            raise
        except subprocess.TimeoutExpired:
            print(f"[❌ TIMEOUT] Command timed out: {' '.join(cmd)}")
            raise

    # Step 3.2: retrieval
    print("πŸ” Step 3.2: Retrieval from knowledge store ...")
    knowledge_store_dir = os.path.join(base_dir, "augmented_data_store")
    retrieval_output = os.path.join(hero_dir, "manifesto_icl_retrieval_top_k_QA.json")

    if not os.path.exists(retrieval_output):
        safe_run([
            "python3.12", "baseline/retrieval_optimized.py",
            "--knowledge_store_dir", knowledge_store_dir,
            "--target_data", hyde_output,
            "--json_output", retrieval_output
        ])

    # Step 3.3: reranking
    print("🏷️ Step 3.3: Reranking retrieved evidence ...")
    rerank_output = os.path.join(hero_dir, "manifesto_icl_reranking_top_k_QA.json")

    if not os.path.exists(rerank_output):
        safe_run([
            "python3.12", "baseline/reranking_optimized.py",
            "--target_data", retrieval_output,
            "--json_output", rerank_output
        ])

    return {
        "hyde": hyde_output,
        "retrieved": retrieval_output,
        "reranked": rerank_output,
    }


if __name__ == "__main__":
    output_files = run_step3_hero_pipeline(user_id="xxx", end_date="20250604")
    for key, path in output_files.items():
        print(f"βœ… {key}: {path}")