KhairulAmirinUM commited on
Commit
ae4c85e
·
1 Parent(s): bda04c4
.idea/workspace.xml CHANGED
@@ -63,7 +63,7 @@
63
  <recent name="D:\Development_Web\FullStackVercel\backend\src" />
64
  </key>
65
  </component>
66
- <component name="RunManager" selected="Python.hf">
67
  <configuration name="cth" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
68
  <module name="backend" />
69
  <option name="ENV_FILES" value="" />
@@ -132,8 +132,8 @@
132
  </configuration>
133
  <recent_temporary>
134
  <list>
135
- <item itemvalue="Python.hf" />
136
  <item itemvalue="Python.hf2" />
 
137
  <item itemvalue="Python.cth" />
138
  </list>
139
  </recent_temporary>
 
63
  <recent name="D:\Development_Web\FullStackVercel\backend\src" />
64
  </key>
65
  </component>
66
+ <component name="RunManager" selected="Python.hf2">
67
  <configuration name="cth" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
68
  <module name="backend" />
69
  <option name="ENV_FILES" value="" />
 
132
  </configuration>
133
  <recent_temporary>
134
  <list>
 
135
  <item itemvalue="Python.hf2" />
136
+ <item itemvalue="Python.hf" />
137
  <item itemvalue="Python.cth" />
138
  </list>
139
  </recent_temporary>
src/hf2.py CHANGED
@@ -2,5 +2,6 @@ from transformers import BertTokenizer, BertForSequenceClassification,TextClassi
2
  # Load tokenizer and model from the fine-tuned directory
3
  model_path = 'intent_classification/TinyBERT_106_V2' # can try other checkpoints
4
 
5
- tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
6
- tokenizer.save_pretrained(model_path)
 
 
2
  # Load tokenizer and model from the fine-tuned directory
3
  model_path = 'intent_classification/TinyBERT_106_V2' # can try other checkpoints
4
 
5
+ tokenizer = BertTokenizer.from_pretrained('huawei-noah/TinyBERT_General_4L_312D')
6
+ tokenizer.save_pretrained(model_path)
7
+ print('finish')
src/intent_classification/TinyBERT_106_V2/tokenizer_config.json CHANGED
@@ -47,7 +47,7 @@
47
  "do_lower_case": true,
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
- "model_max_length": 512,
51
  "never_split": null,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",
 
47
  "do_lower_case": true,
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
  "never_split": null,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",