UMCU commited on
Commit
4e88cbb
·
verified ·
1 Parent(s): e76a11a

Upload 4 files

Browse files
Files changed (3) hide show
  1. app.py +94 -0
  2. poetry.lock +0 -0
  3. pyproject.toml +21 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ from transformers import AutoTokenizer
4
+
5
+ # Cache for loaded pipelines to avoid reloading
6
+ pipeline_cache = {}
7
+
8
+ # List of available masked language models
9
+ def get_model_choices():
10
+ return [
11
+ "UMCU/CardioMedRoBERTa.nl",
12
+ "UMCU/CardioBERTa_base.nl",
13
+ "UMCU/CardioBERTa.nl_clinical",
14
+ "UMCU/CardioDeBERTa.nl",
15
+ "UMCU/CardioDeBERTa.nl_clinical",
16
+ #"UMCU/CardioBigBird_base.nl",
17
+ "CLTL/MedRoBERTa.nl",
18
+ "DTAI-KULeuven/robbert-2023-dutch-base",
19
+ "DTAI-KULeuven/robbert-2023-dutch-large",
20
+ "joeranbosma/dragon-bert-base-mixed-domain",
21
+ "joeranbosma/dragon-bert-base-domain-specific",
22
+ "joeranbosma/dragon-roberta-base-mixed-domain",
23
+ "joeranbosma/dragon-roberta-large-mixed-domain",
24
+ "joeranbosma/dragon-roberta-base-domain-specific",
25
+ "joeranbosma/dragon-roberta-large-domain-specific",
26
+ "joeranbosma/dragon-longformer-base-mixed-domain",
27
+ "joeranbosma/dragon-longformer-large-mixed-domain",
28
+ "joeranbosma/dragon-longformer-base-domain-specific",
29
+ "joeranbosma/dragon-longformer-large-domain-specific"
30
+ ]
31
+
32
+ # Define the prediction function with top-k parameter
33
+ def fill_masked(text: str, model_name: str, top_k: int):
34
+ """
35
+ Takes text with [MASK] tokens, a model name, and top_k, returns top predictions.
36
+ """
37
+ # Load the pipeline if not already cached
38
+ if model_name not in pipeline_cache:
39
+ pipeline_cache[model_name] = pipeline(
40
+ "fill-mask",
41
+ model=model_name
42
+ )
43
+
44
+ fill_mask = pipeline_cache[model_name]
45
+ # Get top_k predictions
46
+ # make sure the mask format is correct
47
+ # [MASK] for BERT and DeBERTa
48
+ # <mask> for BigBird, LongFormer, RoBERTa and XLM-RoBERTa
49
+ #
50
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
51
+ mask_token = tokenizer.mask_token
52
+ text = text.replace("[MASK]", mask_token)
53
+
54
+ results = fill_mask(text, top_k=top_k)
55
+
56
+ # Format results for display
57
+ formatted = []
58
+ for res in results:
59
+ formatted.append({
60
+ "sequence": res["sequence"],
61
+ "score": round(res["score"], 4),
62
+ "token": res["token_str"]
63
+ })
64
+ return formatted
65
+
66
+ # Build the Gradio interface with a slider for top-k
67
+ iface = gr.Interface(
68
+ fn=fill_masked,
69
+ inputs=[
70
+ gr.Textbox(
71
+ lines=2,
72
+ placeholder="Type text with [MASK] tokens here...",
73
+ label="Masked Text"
74
+ ),
75
+ gr.Dropdown(
76
+ choices=get_model_choices(),
77
+ value="bert-base-uncased",
78
+ label="Model"
79
+ ),
80
+ gr.Slider(
81
+ minimum=1,
82
+ maximum=20,
83
+ step=1,
84
+ value=5,
85
+ label="Top K Predictions"
86
+ )
87
+ ],
88
+ outputs=gr.JSON(label="Predictions"),
89
+ title="Masked Language Model tester",
90
+ description="Enter a sentence with [MASK] tokens, select a model, and choose how many top predictions to return."
91
+ )
92
+
93
+ if __name__ == "__main__":
94
+ iface.launch()
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "mlmtester"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = [
6
+ {name = "Bram van Es",email = "bramiozo@gmail.com"}
7
+ ]
8
+ license = {text = "gpl-3"}
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ dependencies = [
12
+ "transformers (>=4.52.4,<5.0.0)",
13
+ "gradio (>=5.34.2,<6.0.0)",
14
+ "torch (>=2.7.1,<3.0.0)",
15
+ "protobuf (>=6.31.1,<7.0.0)"
16
+ ]
17
+
18
+
19
+ [build-system]
20
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
21
+ build-backend = "poetry.core.masonry.api"