Spaces:

A815
/

NLPHW1

Sleeping

App Files Files Community

A815 commited on Nov 9, 2024

Commit

e3355b1

1 Parent(s): 84890a4

add

Browse files

Files changed (2) hide show

app.py +62 -0
nlp4web-codebase +0 -1

app.py CHANGED Viewed

@@ -332,6 +332,68 @@ bm25_index = BM25Index.build_from_documents(
 bm25_index.save("output/bm25_index")
 from scipy.sparse._csc import csc_matrix

 bm25_index.save("output/bm25_index")
+plots_b: Dict[str, List[float]] = {
+    "X": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+    "Y": []
+}
+plots_k1: Dict[str, List[float]] = {
+    "X": [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
+    "Y": []
+}
+## YOUR_CODE_STARTS_HERE
+# Two steps should be involved:
+# Step 1. Fix k1 value to the default one 0.9,
+# go through all the candidate b values (0, 0.1, ..., 1.0),
+# and record in plots_b["Y"] the corresponding performances obtained via evaluate_map;
+# Step 2. Fix b to the best one in step 1. and do the same for k1.
+# Hint (on using the pre-requisite code):
+# - One can use the loaded sciq dataset directly (loaded in the pre-requisite code);
+# - One can build bm25_index with `BM25Index.build_from_documents`;
+# - One can use BM25Retriever to load the index and perform retrieval on the dev queries
+# (dev queries can be obtained via sciq.get_split_queries(Split.dev))
+for b in plots_b["X"]:
+  bm25_index = BM25Index.build_from_documents(
+      documents=iter(sciq.corpus),
+      ndocs=12160,
+      show_progress_bar=False,
+      k1=0.9,
+      b=b
+  )
+  bm25_index.save("output/bm25_index")
+  bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
+  rankings = {}
+  for query in sciq.get_split_queries(Split.dev):
+    ranking = bm25_retriever.retrieve(query=query.text)
+    rankings[query.query_id] = ranking
+  k1_b_map = evaluate_map(rankings, split=Split.dev)
+  plots_b["Y"].append(k1_b_map)
+best_b = plots_b["X"][np.argmax(plots_b["Y"])]
+for k1 in plots_k1["X"]:
+  bm25_index = BM25Index.build_from_documents(
+      documents=iter(sciq.corpus),
+      ndocs=12160,
+      show_progress_bar=False,
+      k1=k1,
+      b=best_b
+  )
+  bm25_index.save("output/bm25_index")
+  bm25_retriever = BM25Retriever(index_dir="output/bm25_index")
+  rankings = {}
+  for query in sciq.get_split_queries(Split.dev):
+    ranking = bm25_retriever.retrieve(query=query.text)
+    rankings[query.query_id] = ranking
+  k1_b_map = evaluate_map(rankings, split=Split.dev)
+  plots_k1["Y"].append(k1_b_map)
 from scipy.sparse._csc import csc_matrix

nlp4web-codebase DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 83f9afbbf7e372c116fdd04997a96449007f861f