Abdllh Ammar-alhaj-ali commited on
Commit
30b3abe
·
0 Parent(s):

Duplicate from Ammar-alhaj-ali/LayoutLMv3-FUNSD

Browse files

Co-authored-by: Ammar Alhaj Ali <Ammar-alhaj-ali@users.noreply.huggingface.co>

Files changed (8) hide show
  1. requirements.txt +7 -0
  2. .gitattributes +31 -0
  3. README.md +13 -0
  4. app.py +111 -0
  5. img1.png +0 -0
  6. img2.png +0 -0
  7. img3.png +0 -0
  8. packages.txt +6 -0
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ PyYAML==6.0
2
+ git+https://github.com/huggingface/transformers.git
3
+ pytesseract==0.3.9
4
+ datasets==2.2.2
5
+ seqeval==1.2.2
6
+
7
+
.gitattributes ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: LayoutLMv3 Fine Tuning FUNSD
3
+ emoji: 📉
4
+ colorFrom: pink
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.2
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: Ammar-alhaj-ali/LayoutLMv3-FUNSD
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system('pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu')
3
+ os.system('pip install -q git+https://github.com/huggingface/transformers.git')
4
+ os.system('pip install pytesseract')
5
+
6
+
7
+
8
+ import gradio as gr
9
+ import numpy as np
10
+ from transformers import AutoModelForTokenClassification
11
+ from datasets.features import ClassLabel
12
+ from transformers import AutoProcessor
13
+ from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D
14
+ import torch
15
+ from datasets import load_metric
16
+ from transformers import LayoutLMv3ForTokenClassification
17
+ from transformers.data.data_collator import default_data_collator
18
+
19
+
20
+ from transformers import AutoModelForTokenClassification
21
+ from datasets import load_dataset
22
+ from PIL import Image, ImageDraw, ImageFont
23
+
24
+
25
+ processor = AutoProcessor.from_pretrained("Ammar-alhaj-ali/LayoutLMv3-Fine-Tuning-FUNSD", apply_ocr=True)
26
+ model = AutoModelForTokenClassification.from_pretrained("Ammar-alhaj-ali/LayoutLMv3-Fine-Tuning-FUNSD")
27
+
28
+ # load image example
29
+ #dataset = load_dataset("nielsr/funsd-layoutlmv3", split="test")
30
+ #Image.open(dataset[2]["image_path"]).convert("RGB").save("img1.png")
31
+ #Image.open(dataset[1]["image_path"]).convert("RGB").save("img2.png")
32
+ #Image.open(dataset[0]["image_path"]).convert("RGB").save("img3.png")
33
+ # define id2label, label2color
34
+ labels = ['O', 'B-HEADER', 'I-HEADER', 'B-QUESTION', 'I-QUESTION', 'B-ANSWER', 'I-ANSWER']
35
+ id2label = {v: k for v, k in enumerate(labels)}
36
+ label2color = {
37
+ "B-HEADER": 'red',
38
+ "I-HEADER": 'red',
39
+ "B-QUESTION": 'red',
40
+ "I-QUESTION": "red",
41
+ "B-ANSWER": 'blue',
42
+ "I-ANSWER": 'blue',
43
+ "O": 'orange'
44
+ }
45
+
46
+ def unnormalize_box(bbox, width, height):
47
+ return [
48
+ width * (bbox[0] / 1000),
49
+ height * (bbox[1] / 1000),
50
+ width * (bbox[2] / 1000),
51
+ height * (bbox[3] / 1000),
52
+ ]
53
+
54
+
55
+ def iob_to_label(label):
56
+ return label
57
+
58
+
59
+
60
+ def process_image(image):
61
+
62
+ print(type(image))
63
+ width, height = image.size
64
+
65
+ # encode
66
+ encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
67
+ offset_mapping = encoding.pop('offset_mapping')
68
+
69
+ # forward pass
70
+ outputs = model(**encoding)
71
+
72
+ # get predictions
73
+ predictions = outputs.logits.argmax(-1).squeeze().tolist()
74
+ token_boxes = encoding.bbox.squeeze().tolist()
75
+
76
+ # only keep non-subword predictions
77
+ is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
78
+ true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
79
+ true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
80
+
81
+ # draw predictions over the image
82
+ draw = ImageDraw.Draw(image)
83
+ font = ImageFont.load_default()
84
+ for prediction, box in zip(true_predictions, true_boxes):
85
+ predicted_label = iob_to_label(prediction)
86
+ draw.rectangle(box, outline=label2color[predicted_label]) #label2color[predicted_label]
87
+ draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font) #label2color[predicted_label]
88
+
89
+ return image
90
+
91
+
92
+ title = "Extracting information from FUNSD using the LayoutLMv3 "
93
+ description = "I Fine tuned LayoutLMv3 on FUNSD (Form Understanding in. Noisy Scanned Documents) "
94
+
95
+ article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2]"
96
+
97
+ examples =[['img1.png'],['img2.png'],['img3.png']]
98
+
99
+ css = """.output_image, .input_image {height: 600px !important}"""
100
+
101
+ iface = gr.Interface(fn=process_image,
102
+ inputs=gr.inputs.Image(type="pil"),
103
+ outputs=gr.outputs.Image(type="pil", label="annotated image"),
104
+ title=title,
105
+ description=description,
106
+ article=article,
107
+ examples=examples,
108
+ css=css,
109
+ analytics_enabled = True, enable_queue=True)
110
+
111
+ iface.launch(inline=False, share=False, debug=False)
img1.png ADDED
img2.png ADDED
img3.png ADDED
packages.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ffmpeg
2
+ libsm6
3
+ libxext6 -y
4
+ libgl1
5
+ -y libgl1-mesa-glx
6
+ tesseract-ocr