Wonderwize gryan-galario commited on
Commit
b01fe85
·
0 Parent(s):

Duplicate from gryan-galario/manga-ocr-demo

Browse files

Co-authored-by: Gryan Carl Galario <gryan-galario@users.noreply.huggingface.co>

Files changed (16) hide show
  1. .gitattributes +27 -0
  2. 00.jpg +0 -0
  3. 01.jpg +0 -0
  4. 02.jpg +0 -0
  5. 03.jpg +0 -0
  6. 04.jpg +0 -0
  7. 05.jpg +0 -0
  8. 06.jpg +0 -0
  9. 07.jpg +0 -0
  10. 08.jpg +0 -0
  11. 09.jpg +0 -0
  12. 10.jpg +0 -0
  13. 11.jpg +0 -0
  14. README.md +14 -0
  15. app.py +45 -0
  16. requirements.txt +10 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
00.jpg ADDED
01.jpg ADDED
02.jpg ADDED
03.jpg ADDED
04.jpg ADDED
05.jpg ADDED
06.jpg ADDED
07.jpg ADDED
08.jpg ADDED
09.jpg ADDED
10.jpg ADDED
11.jpg ADDED
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Manga Ocr Demo
3
+ emoji: 💻
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 2.8.14
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: gryan-galario/manga-ocr-demo
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import jaconv
3
+ import gradio as gr
4
+ from transformers import AutoTokenizer, AutoFeatureExtractor, VisionEncoderDecoderModel
5
+ from PIL import Image
6
+ import torch
7
+
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained("kha-white/manga-ocr-base")
10
+
11
+ model = VisionEncoderDecoderModel.from_pretrained("kha-white/manga-ocr-base")
12
+
13
+ feature_extractor = AutoFeatureExtractor.from_pretrained("kha-white/manga-ocr-base")
14
+
15
+ examples = ["00.jpg", "01.jpg", "02.jpg", "03.jpg", "04.jpg", "05.jpg", "06.jpg", "07.jpg", "08.jpg", "09.jpg", "10.jpg", "11.jpg"]
16
+
17
+ def post_process(text):
18
+ text = ''.join(text.split())
19
+ text = text.replace('…', '...')
20
+ text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
21
+ text = jaconv.h2z(text, ascii=True, digit=True)
22
+ return text
23
+
24
+ def manga_ocr(img):
25
+ img = img.convert('L').convert('RGB')
26
+ pixel_values = feature_extractor(img, return_tensors="pt").pixel_values
27
+ output = model.generate(pixel_values)[0]
28
+ text = tokenizer.decode(output, skip_special_tokens=True)
29
+ text = post_process(text)
30
+ return text
31
+
32
+ iface = gr.Interface(
33
+ fn=manga_ocr,
34
+ inputs=[gr.inputs.Image(label="Input", type="pil")],
35
+ outputs="text",
36
+ layout="horizontal",
37
+ theme="huggingface",
38
+ title="Manga OCR",
39
+ description="Optical Character Recognization for Japanese Texts with focus on Mangas. The model is trained by kha-white with Github link: <a href=\"https://github.com/kha-white/manga-ocr\">manga-ocr</a> while the Space App is made by me.",
40
+ allow_flagging='never',
41
+ examples=examples,
42
+ article = "Author: <a href=\"https://huggingface.co/gryan-galario\">Gryan Galario</a>",
43
+ )
44
+
45
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fire
2
+ fugashi
3
+ jaconv
4
+ loguru
5
+ numpy
6
+ Pillow
7
+ pyperclip
8
+ torch>=1.0
9
+ transformers>=4.12.5
10
+ unidic_lite