freemt
commited on
Commit
·
b665d70
1
Parent(s):
78bc728
Update hanzidentifier opencc to handle traditionl chinese
Browse files- app-.py +0 -140
- app.py +1 -0
- litbee/__main__.py +2 -0
- litbee/app-.py +0 -140
- litbee/app.py +10 -11
- litbee/dzbee_page.py +0 -20
- litbee/ezbee_page.py +0 -129
- litbee/fetch_upload.py +28 -18
- litbee/home.py +14 -9
- litbee/info.py +8 -5
- litbee/multipage.py +18 -17
- litbee/options.py +3 -2
- litbee/settings.py +22 -3
- litbee/t2s.py +39 -0
- litbee/utils.py +2 -1
- poetry.lock +68 -1
- pylintrc +7 -0
- pyproject.toml +6 -1
- run-flake8.sh +1 -0
- tests/test_t2s.py +15 -0
app-.py
DELETED
@@ -1,140 +0,0 @@
|
|
1 |
-
"""Prep __main__.py.
|
2 |
-
|
3 |
-
https://share.streamlit.io/deploy
|
4 |
-
Advanced settings...
|
5 |
-
Python version
|
6 |
-
3.7
|
7 |
-
3.8
|
8 |
-
3.9*
|
9 |
-
3.10
|
10 |
-
|
11 |
-
https://docs.streamlit.io/knowledge-base/using-streamlit/hide-row-indices-displaying-dataframe
|
12 |
-
Hide row indices when displaying a dataframe
|
13 |
-
# CSS to inject contained in a string
|
14 |
-
hide_table_row_index = '''
|
15 |
-
<style>
|
16 |
-
tbody th {display:none}
|
17 |
-
.blank {display:none}
|
18 |
-
</style>
|
19 |
-
'''
|
20 |
-
# Inject CSS with Markdown
|
21 |
-
st.markdown(hide_table_row_index, unsafe_allow_html=True)
|
22 |
-
|
23 |
-
# Display a static table
|
24 |
-
st.table(df)
|
25 |
-
|
26 |
-
# Hide row indices with st.dataframe
|
27 |
-
# CSS to inject contained in a string
|
28 |
-
hide_dataframe_row_index = '''
|
29 |
-
<style>
|
30 |
-
.row_heading.level0 {display:none}
|
31 |
-
.blank {display:none}
|
32 |
-
</style>
|
33 |
-
'''
|
34 |
-
# Inject CSS with Markdown
|
35 |
-
st.markdown(hide_dataframe_row_index, unsafe_allow_html=True)
|
36 |
-
|
37 |
-
# Display an interactive table
|
38 |
-
st.dataframe(df)
|
39 |
-
|
40 |
-
https://medium.com/@avra42/streamlit-python-cool-tricks-to-make-your-web-application-look-better-8abfc3763a5b
|
41 |
-
hide_menu_style = '''
|
42 |
-
<style>
|
43 |
-
#MainMenu {visibility: hidden; }
|
44 |
-
footer {visibility: hidden;}
|
45 |
-
</style>
|
46 |
-
'''
|
47 |
-
st.markdown(hide_menu_style, unsafe_allow_html=True)
|
48 |
-
|
49 |
-
"""
|
50 |
-
# pylint: disable=invalid-name
|
51 |
-
import os
|
52 |
-
import sys
|
53 |
-
import time
|
54 |
-
from pathlib import Path
|
55 |
-
from types import SimpleNamespace
|
56 |
-
from typing import Optional
|
57 |
-
|
58 |
-
import loguru
|
59 |
-
import logzero
|
60 |
-
import pandas as pd
|
61 |
-
import streamlit as st
|
62 |
-
from loguru import logger as loggu
|
63 |
-
from logzero import logger
|
64 |
-
from set_loglevel import set_loglevel
|
65 |
-
from streamlit import session_state as state
|
66 |
-
|
67 |
-
from litbee import __version__, litbee
|
68 |
-
from litbee.options import options
|
69 |
-
|
70 |
-
# from litbee.files2df import files2df
|
71 |
-
# from litbee.utils import sb_front_cover, instructions, menu_items
|
72 |
-
# from litbee.ezbee_page import ezbee_page
|
73 |
-
# from litbee.dzbee_page import dzbee_page
|
74 |
-
# from litbee.xbee_page import xbee_page
|
75 |
-
from litbee.utils import menu_items
|
76 |
-
|
77 |
-
# from ezbee import ezbee
|
78 |
-
|
79 |
-
curr_py = sys.version[:3]
|
80 |
-
msg = f"Some packages litbee depends on can only run with Python 3.8, current python is {curr_py}, sorry..."
|
81 |
-
assert curr_py == "3.8", msg
|
82 |
-
|
83 |
-
os.environ["TZ"] = "Asia/Shanghai"
|
84 |
-
time.tzset()
|
85 |
-
os.environ["LOGLEVEL"] = "10" # uncomment this in dev
|
86 |
-
logzero.loglevel(set_loglevel())
|
87 |
-
|
88 |
-
loggu.remove()
|
89 |
-
_ = (
|
90 |
-
"<green>{time:YY-MM-DD HH:mm:ss}</green> | "
|
91 |
-
"<level>{level: <5}</level> | <level>{message}</level> "
|
92 |
-
"<cyan>{name}</cyan>:<cyan>{line}</cyan>"
|
93 |
-
)
|
94 |
-
loggu.add(
|
95 |
-
sys.stderr,
|
96 |
-
format=_,
|
97 |
-
level=set_loglevel(),
|
98 |
-
colorize=True,
|
99 |
-
)
|
100 |
-
|
101 |
-
# from PIL import Image
|
102 |
-
# page_icon=Image.open("icon.ico"),
|
103 |
-
st.set_page_config(
|
104 |
-
page_title=f"litbee v{__version__}",
|
105 |
-
# page_icon="🧊",
|
106 |
-
page_icon="🐝",
|
107 |
-
# layout="wide",
|
108 |
-
initial_sidebar_state="auto", # "auto" or "expanded" or "collapsed",
|
109 |
-
menu_items=menu_items,
|
110 |
-
)
|
111 |
-
|
112 |
-
# pd.set_option("precision", 2)
|
113 |
-
pd.set_option("display.precision", 2)
|
114 |
-
pd.options.display.float_format = "{:,.2f}".format
|
115 |
-
|
116 |
-
_ = dict(
|
117 |
-
beetype="ezbee",
|
118 |
-
src_filename="",
|
119 |
-
tgt_filename="",
|
120 |
-
src_fileio=b"",
|
121 |
-
tgt_fileio=b"",
|
122 |
-
src_file="",
|
123 |
-
tgt_file="",
|
124 |
-
list1=[""],
|
125 |
-
list2=[""],
|
126 |
-
df=None,
|
127 |
-
df_a=None,
|
128 |
-
df_s_a=None,
|
129 |
-
)
|
130 |
-
if "ns" not in state:
|
131 |
-
state.ns = SimpleNamespace(**_)
|
132 |
-
state.ns.list = [*_]
|
133 |
-
|
134 |
-
|
135 |
-
def main():
|
136 |
-
"""Bootstrap."""
|
137 |
-
options()
|
138 |
-
|
139 |
-
|
140 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -135,6 +135,7 @@ _ = dict(
|
|
135 |
beetype="ezbee",
|
136 |
sourcetype="upload",
|
137 |
sourcecount=2,
|
|
|
138 |
src_filename="",
|
139 |
tgt_filename="",
|
140 |
src_fileio=b"",
|
|
|
135 |
beetype="ezbee",
|
136 |
sourcetype="upload",
|
137 |
sourcecount=2,
|
138 |
+
sent_ali=None,
|
139 |
src_filename="",
|
140 |
tgt_filename="",
|
141 |
src_fileio=b"",
|
litbee/__main__.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
"""Run streamlit run app.py from __main__.py."""
|
|
|
2 |
import sys
|
|
|
3 |
from streamlit import cli
|
4 |
|
5 |
sys.argv = ["streamlit", "run", "app.py"]
|
|
|
1 |
"""Run streamlit run app.py from __main__.py."""
|
2 |
+
# pylint: disable=no-value-for-parameter
|
3 |
import sys
|
4 |
+
|
5 |
from streamlit import cli
|
6 |
|
7 |
sys.argv = ["streamlit", "run", "app.py"]
|
litbee/app-.py
DELETED
@@ -1,140 +0,0 @@
|
|
1 |
-
"""Prep __main__.py.
|
2 |
-
|
3 |
-
https://share.streamlit.io/deploy
|
4 |
-
Advanced settings...
|
5 |
-
Python version
|
6 |
-
3.7
|
7 |
-
3.8
|
8 |
-
3.9*
|
9 |
-
3.10
|
10 |
-
|
11 |
-
https://docs.streamlit.io/knowledge-base/using-streamlit/hide-row-indices-displaying-dataframe
|
12 |
-
Hide row indices when displaying a dataframe
|
13 |
-
# CSS to inject contained in a string
|
14 |
-
hide_table_row_index = '''
|
15 |
-
<style>
|
16 |
-
tbody th {display:none}
|
17 |
-
.blank {display:none}
|
18 |
-
</style>
|
19 |
-
'''
|
20 |
-
# Inject CSS with Markdown
|
21 |
-
st.markdown(hide_table_row_index, unsafe_allow_html=True)
|
22 |
-
|
23 |
-
# Display a static table
|
24 |
-
st.table(df)
|
25 |
-
|
26 |
-
# Hide row indices with st.dataframe
|
27 |
-
# CSS to inject contained in a string
|
28 |
-
hide_dataframe_row_index = '''
|
29 |
-
<style>
|
30 |
-
.row_heading.level0 {display:none}
|
31 |
-
.blank {display:none}
|
32 |
-
</style>
|
33 |
-
'''
|
34 |
-
# Inject CSS with Markdown
|
35 |
-
st.markdown(hide_dataframe_row_index, unsafe_allow_html=True)
|
36 |
-
|
37 |
-
# Display an interactive table
|
38 |
-
st.dataframe(df)
|
39 |
-
|
40 |
-
https://medium.com/@avra42/streamlit-python-cool-tricks-to-make-your-web-application-look-better-8abfc3763a5b
|
41 |
-
hide_menu_style = '''
|
42 |
-
<style>
|
43 |
-
#MainMenu {visibility: hidden; }
|
44 |
-
footer {visibility: hidden;}
|
45 |
-
</style>
|
46 |
-
'''
|
47 |
-
st.markdown(hide_menu_style, unsafe_allow_html=True)
|
48 |
-
|
49 |
-
"""
|
50 |
-
# pylint: disable=invalid-name
|
51 |
-
import os
|
52 |
-
import sys
|
53 |
-
import time
|
54 |
-
from pathlib import Path
|
55 |
-
from types import SimpleNamespace
|
56 |
-
from typing import Optional
|
57 |
-
|
58 |
-
import loguru
|
59 |
-
import logzero
|
60 |
-
import pandas as pd
|
61 |
-
import streamlit as st
|
62 |
-
from loguru import logger as loggu
|
63 |
-
from logzero import logger
|
64 |
-
from set_loglevel import set_loglevel
|
65 |
-
from streamlit import session_state as state
|
66 |
-
|
67 |
-
from litbee import __version__, litbee
|
68 |
-
from litbee.options import options
|
69 |
-
|
70 |
-
# from litbee.files2df import files2df
|
71 |
-
# from litbee.utils import sb_front_cover, instructions, menu_items
|
72 |
-
# from litbee.ezbee_page import ezbee_page
|
73 |
-
# from litbee.dzbee_page import dzbee_page
|
74 |
-
# from litbee.xbee_page import xbee_page
|
75 |
-
from litbee.utils import menu_items
|
76 |
-
|
77 |
-
# from ezbee import ezbee
|
78 |
-
|
79 |
-
curr_py = sys.version[:3]
|
80 |
-
msg = f"Some packages litbee depends on can only run with Python 3.8, current python is {curr_py}, sorry..."
|
81 |
-
assert curr_py == "3.8", msg
|
82 |
-
|
83 |
-
os.environ["TZ"] = "Asia/Shanghai"
|
84 |
-
time.tzset()
|
85 |
-
os.environ["LOGLEVEL"] = "10" # uncomment this in dev
|
86 |
-
logzero.loglevel(set_loglevel())
|
87 |
-
|
88 |
-
loggu.remove()
|
89 |
-
_ = (
|
90 |
-
"<green>{time:YY-MM-DD HH:mm:ss}</green> | "
|
91 |
-
"<level>{level: <5}</level> | <level>{message}</level> "
|
92 |
-
"<cyan>{name}</cyan>:<cyan>{line}</cyan>"
|
93 |
-
)
|
94 |
-
loggu.add(
|
95 |
-
sys.stderr,
|
96 |
-
format=_,
|
97 |
-
level=set_loglevel(),
|
98 |
-
colorize=True,
|
99 |
-
)
|
100 |
-
|
101 |
-
# from PIL import Image
|
102 |
-
# page_icon=Image.open("icon.ico"),
|
103 |
-
st.set_page_config( # type: ignore
|
104 |
-
page_title=f"litbee v{__version__}",
|
105 |
-
# page_icon="🧊",
|
106 |
-
page_icon="🐝",
|
107 |
-
# layout="wide",
|
108 |
-
initial_sidebar_state="auto", # "auto" or "expanded" or "collapsed",
|
109 |
-
menu_items=menu_items,
|
110 |
-
)
|
111 |
-
|
112 |
-
# pd.set_option("precision", 2)
|
113 |
-
pd.set_option("display.precision", 2)
|
114 |
-
pd.options.display.float_format = "{:,.2f}".format
|
115 |
-
|
116 |
-
_ = dict(
|
117 |
-
beetype="ezbee",
|
118 |
-
src_filename="",
|
119 |
-
tgt_filename="",
|
120 |
-
src_fileio=b"",
|
121 |
-
tgt_fileio=b"",
|
122 |
-
src_file="",
|
123 |
-
tgt_file="",
|
124 |
-
list1=[""],
|
125 |
-
list2=[""],
|
126 |
-
df=None,
|
127 |
-
df_a=None,
|
128 |
-
df_s_a=None,
|
129 |
-
)
|
130 |
-
if "ns" not in state:
|
131 |
-
state.ns = SimpleNamespace(**_)
|
132 |
-
state.ns.list = [*_]
|
133 |
-
|
134 |
-
|
135 |
-
def main():
|
136 |
-
"""Bootstrap."""
|
137 |
-
options()
|
138 |
-
|
139 |
-
|
140 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
litbee/app.py
CHANGED
@@ -55,13 +55,12 @@ from pathlib import Path
|
|
55 |
from types import SimpleNamespace
|
56 |
from typing import Optional
|
57 |
|
|
|
|
|
|
|
58 |
import loguru
|
59 |
import logzero
|
60 |
import pandas as pd
|
61 |
-
import ezbee
|
62 |
-
import dzbee
|
63 |
-
import debee
|
64 |
-
|
65 |
import streamlit as st
|
66 |
from loguru import logger as loggu
|
67 |
from logzero import logger
|
@@ -69,25 +68,25 @@ from set_loglevel import set_loglevel
|
|
69 |
from streamlit import session_state as state
|
70 |
|
71 |
from litbee import __version__
|
72 |
-
|
|
|
|
|
|
|
73 |
|
74 |
# from litbee.files2df import files2df
|
75 |
# from litbee.utils import sb_front_cover, instructions, menu_items
|
76 |
# from litbee.ezbee_page import ezbee_page
|
77 |
# from litbee.dzbee_page import dzbee_page
|
78 |
# from litbee.xbee_page import xbee_page
|
79 |
-
from litbee.utils import menu_items
|
|
|
|
|
80 |
|
81 |
-
from litbee.multipage import Multipage
|
82 |
|
83 |
# from litbee.fetch_upload import fetch_upload
|
84 |
# from litbee.fetch_paste import fetch_paste
|
85 |
# from litbee.fetch_urls import fetch_urls
|
86 |
|
87 |
-
from litbee.home import home
|
88 |
-
from litbee.settings import settings
|
89 |
-
from litbee.info import info
|
90 |
-
from litbee.utils import style_css
|
91 |
|
92 |
# from ezbee import ezbee
|
93 |
|
|
|
55 |
from types import SimpleNamespace
|
56 |
from typing import Optional
|
57 |
|
58 |
+
import debee
|
59 |
+
import dzbee
|
60 |
+
import ezbee
|
61 |
import loguru
|
62 |
import logzero
|
63 |
import pandas as pd
|
|
|
|
|
|
|
|
|
64 |
import streamlit as st
|
65 |
from loguru import logger as loggu
|
66 |
from logzero import logger
|
|
|
68 |
from streamlit import session_state as state
|
69 |
|
70 |
from litbee import __version__
|
71 |
+
from litbee.home import home
|
72 |
+
from litbee.info import info
|
73 |
+
from litbee.multipage import Multipage
|
74 |
+
from litbee.settings import settings
|
75 |
|
76 |
# from litbee.files2df import files2df
|
77 |
# from litbee.utils import sb_front_cover, instructions, menu_items
|
78 |
# from litbee.ezbee_page import ezbee_page
|
79 |
# from litbee.dzbee_page import dzbee_page
|
80 |
# from litbee.xbee_page import xbee_page
|
81 |
+
from litbee.utils import menu_items, style_css
|
82 |
+
|
83 |
+
# from litbee.options import options
|
84 |
|
|
|
85 |
|
86 |
# from litbee.fetch_upload import fetch_upload
|
87 |
# from litbee.fetch_paste import fetch_paste
|
88 |
# from litbee.fetch_urls import fetch_urls
|
89 |
|
|
|
|
|
|
|
|
|
90 |
|
91 |
# from ezbee import ezbee
|
92 |
|
litbee/dzbee_page.py
DELETED
@@ -1,20 +0,0 @@
|
|
1 |
-
"""Display dzbee page."""
|
2 |
-
import pandas as pd
|
3 |
-
import streamlit as st
|
4 |
-
from loguru import logger as loggu
|
5 |
-
from logzero import logger
|
6 |
-
|
7 |
-
|
8 |
-
def dzbee_page():
|
9 |
-
"""Display dzbee page."""
|
10 |
-
# st.title('dzbee')
|
11 |
-
# st.write('Welcome to app1')
|
12 |
-
|
13 |
-
try:
|
14 |
-
df = st.session_state.ns.df
|
15 |
-
except Exception as exc:
|
16 |
-
logger.error(exc)
|
17 |
-
df = pd.DataFrame([[""]])
|
18 |
-
|
19 |
-
loggu.debug(" df ")
|
20 |
-
st.table(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
litbee/ezbee_page.py
DELETED
@@ -1,129 +0,0 @@
|
|
1 |
-
"""Display ezbee page."""
|
2 |
-
from functools import partial
|
3 |
-
|
4 |
-
import logzero
|
5 |
-
import pandas as pd
|
6 |
-
import streamlit as st
|
7 |
-
from ezbee import ezbee
|
8 |
-
from ezbee.gen_pairs import gen_pairs
|
9 |
-
from loguru import logger as loggu
|
10 |
-
from logzero import logger
|
11 |
-
from set_loglevel import set_loglevel
|
12 |
-
from st_aggrid import AgGrid
|
13 |
-
from st_aggrid.grid_options_builder import GridOptionsBuilder
|
14 |
-
from streamlit import session_state as state
|
15 |
-
|
16 |
-
logzero.loglevel(set_loglevel())
|
17 |
-
|
18 |
-
|
19 |
-
def st_radio_horizontal(*args, **kwargs):
|
20 |
-
"""Trick to have horizontal st radio to simulate tabs."""
|
21 |
-
col, _ = st.columns(2)
|
22 |
-
with col:
|
23 |
-
# st.write('<style> div[data-testid=column] > div > div > div > div.stRadio > div{flex-direction: row;}</style>', unsafe_allow_html=True)
|
24 |
-
# return st.radio(*args, **kwargs)
|
25 |
-
st.write(
|
26 |
-
"<style> div[data-testid=stSidebar] > div > div > div > div > div > div > div.stRadio > div{flex-direction: row;}</style>",
|
27 |
-
unsafe_allow_html=True,
|
28 |
-
)
|
29 |
-
return st.sidebar.radio(*args, **kwargs)
|
30 |
-
|
31 |
-
|
32 |
-
def ezbee_page():
|
33 |
-
"""Display ezbee page."""
|
34 |
-
# st.title('ezbee')
|
35 |
-
# st.write('### ezbee')
|
36 |
-
# st.write('Welcome to app1')
|
37 |
-
|
38 |
-
_ = """
|
39 |
-
try:
|
40 |
-
df = st.session_state.ns.df
|
41 |
-
except Exception as exc:
|
42 |
-
logger.error(exc)
|
43 |
-
df = pd.DataFrame([[""]])
|
44 |
-
# """
|
45 |
-
|
46 |
-
# st.table(df) # looks alright
|
47 |
-
|
48 |
-
# stlyed pd dataframe?
|
49 |
-
# bigger, no pagination
|
50 |
-
# st.markdown(df.to_html(), unsafe_allow_html=True)
|
51 |
-
|
52 |
-
# ag_grid smallish, editable, probably slower
|
53 |
-
|
54 |
-
if "df" not in globals():
|
55 |
-
logger.debug(" df not defined, return")
|
56 |
-
return None
|
57 |
-
|
58 |
-
df = pd.DataFrame([["", "", ""]], columns=["text1", "text2", "llh"])
|
59 |
-
|
60 |
-
df_exp = st.expander("to be aligned", expanded=False)
|
61 |
-
with df_exp:
|
62 |
-
st.write(df) # too small
|
63 |
-
|
64 |
-
_ = """
|
65 |
-
ag_exp = st.expander("done aligned") # , expanded=False
|
66 |
-
with ag_exp:
|
67 |
-
agdf = AgGrid(
|
68 |
-
df,
|
69 |
-
# fit_columns_on_grid_load=True,
|
70 |
-
editable=True,
|
71 |
-
gridOptions=gridOptions,
|
72 |
-
key="ag_exp",
|
73 |
-
)
|
74 |
-
# """
|
75 |
-
|
76 |
-
list1 = [elm.strip() for elm in df.text1 if elm.strip()]
|
77 |
-
list2 = [elm.strip() for elm in df.text2 if elm.strip()]
|
78 |
-
logger.info("Processing data...")
|
79 |
-
try:
|
80 |
-
aset = ezbee(
|
81 |
-
list1,
|
82 |
-
list2,
|
83 |
-
# eps=eps,
|
84 |
-
# min_samples=min_samples,
|
85 |
-
)
|
86 |
-
except Exception as e:
|
87 |
-
logger.error("aset = ezbee(...) exc: %s", e)
|
88 |
-
aset = ""
|
89 |
-
return None
|
90 |
-
|
91 |
-
# fastlid changed logger.level is changed to 20
|
92 |
-
# turn back to loglevel
|
93 |
-
logzero.loglevel(set_loglevel())
|
94 |
-
if aset:
|
95 |
-
logger.debug("aset: %s...%s", aset[:3], aset[-3:])
|
96 |
-
|
97 |
-
# st.write(aset)
|
98 |
-
|
99 |
-
aligned_pairs = gen_pairs(list1, list2, aset)
|
100 |
-
if aligned_pairs:
|
101 |
-
logger.debug("%s...%s", aligned_pairs[:3], aligned_pairs[-3:])
|
102 |
-
|
103 |
-
df_a = pd.DataFrame(aligned_pairs, columns=["text1", "text2", "llh"])
|
104 |
-
|
105 |
-
# insert seq no
|
106 |
-
df_a.insert(0, "sn", range(len(df_a)))
|
107 |
-
|
108 |
-
gb = GridOptionsBuilder.from_dataframe(df_a)
|
109 |
-
gb.configure_pagination(paginationAutoPageSize=True)
|
110 |
-
options = {
|
111 |
-
"resizable": True,
|
112 |
-
"autoHeight": True,
|
113 |
-
"wrapText": True,
|
114 |
-
"editable": True,
|
115 |
-
}
|
116 |
-
gb.configure_default_column(**options)
|
117 |
-
gridOptions = gb.build()
|
118 |
-
|
119 |
-
st.write("aligned (double-click a cell to edit)")
|
120 |
-
agdf = AgGrid(
|
121 |
-
# df,
|
122 |
-
df_a,
|
123 |
-
gridOptions=gridOptions,
|
124 |
-
key="outside",
|
125 |
-
editable=True,
|
126 |
-
width="100%",
|
127 |
-
height=500,
|
128 |
-
# fit_columns_on_grid_load=True,
|
129 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
litbee/fetch_upload.py
CHANGED
@@ -3,31 +3,33 @@
|
|
3 |
org ezbee_page.py.
|
4 |
"""
|
5 |
# pylint: disable=invalid-name
|
6 |
-
|
7 |
import inspect
|
|
|
|
|
8 |
from itertools import zip_longest
|
9 |
from time import perf_counter
|
10 |
|
|
|
11 |
import logzero
|
12 |
import numpy as np
|
13 |
import pandas as pd
|
14 |
import streamlit as st
|
15 |
-
from dzbee import dzbee # noqa
|
16 |
-
from ezbee import ezbee # noqa
|
17 |
-
from debee import debee # noqa
|
18 |
|
19 |
-
# from ezbee.gen_pairs import gen_pairs # aset2pairs?
|
20 |
from aset2pairs import aset2pairs
|
|
|
|
|
|
|
21 |
from fastlid import fastlid
|
22 |
from icecream import ic
|
23 |
from loguru import logger as loggu
|
24 |
from logzero import logger
|
25 |
from set_loglevel import set_loglevel
|
26 |
-
from st_aggrid import AgGrid,
|
27 |
-
|
28 |
from streamlit import session_state as state
|
29 |
|
30 |
-
|
31 |
|
32 |
|
33 |
def fetch_upload(): # noqa
|
@@ -43,7 +45,7 @@ def fetch_upload(): # noqa
|
|
43 |
return None
|
44 |
|
45 |
# src_fileio tgt_fileio
|
46 |
-
with st.form(key=
|
47 |
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
48 |
with _:
|
49 |
col1, col2 = st.columns(2)
|
@@ -67,7 +69,7 @@ def fetch_upload(): # noqa
|
|
67 |
key="tgt_text",
|
68 |
# accept_multiple_files=True,
|
69 |
)
|
70 |
-
submitted = st.form_submit_button(
|
71 |
|
72 |
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
73 |
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
@@ -127,10 +129,12 @@ def fetch_upload(): # noqa
|
|
127 |
if not (filename1 or filename2):
|
128 |
st.write("| no file uploaded")
|
129 |
return None
|
130 |
-
|
|
|
131 |
st.write("| file1 not ready")
|
132 |
return None
|
133 |
-
|
|
|
134 |
st.write("| file2 not ready")
|
135 |
return None
|
136 |
|
@@ -216,16 +220,18 @@ def fetch_upload(): # noqa
|
|
216 |
# logger.debug("fn.__doc__: %s", fn.__doc__)
|
217 |
logger.debug("fn.__name__: %s", fn.__name__)
|
218 |
|
219 |
-
from inspect import getabsfile
|
220 |
-
logger.debug("getabsfile(fn): %s", getabsfile(fn))
|
|
|
|
|
221 |
|
222 |
with st.spinner(" diggin..."):
|
223 |
then = perf_counter()
|
224 |
try:
|
225 |
# aset = ezbee/dzbee/debee
|
226 |
aset = globals()[state.ns.beetype](
|
227 |
-
list1,
|
228 |
-
list2,
|
229 |
# eps=eps,
|
230 |
# min_samples=min_samples,
|
231 |
)
|
@@ -267,7 +273,9 @@ def fetch_upload(): # noqa
|
|
267 |
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
268 |
# logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
|
269 |
|
270 |
-
df_a = pd.DataFrame(
|
|
|
|
|
271 |
|
272 |
# if set_loglevel() <= 10:
|
273 |
_ = st.expander("done aligned")
|
@@ -301,5 +309,7 @@ def fetch_upload(): # noqa
|
|
301 |
# width="100%", # width parameter is deprecated
|
302 |
height=750,
|
303 |
# fit_columns_on_grid_load=True,
|
304 |
-
update_mode=GridUpdateMode.MODEL_CHANGED
|
305 |
)
|
|
|
|
|
|
3 |
org ezbee_page.py.
|
4 |
"""
|
5 |
# pylint: disable=invalid-name
|
6 |
+
# pylint: disable=too-many-return-statements,too-many-branches,too-many-statements, too-many-locals
|
7 |
import inspect
|
8 |
+
|
9 |
+
from functools import partial
|
10 |
from itertools import zip_longest
|
11 |
from time import perf_counter
|
12 |
|
13 |
+
import hanzidentifier
|
14 |
import logzero
|
15 |
import numpy as np
|
16 |
import pandas as pd
|
17 |
import streamlit as st
|
|
|
|
|
|
|
18 |
|
|
|
19 |
from aset2pairs import aset2pairs
|
20 |
+
from debee import debee # noqa
|
21 |
+
from dzbee import dzbee # noqa
|
22 |
+
from ezbee import ezbee # noqa
|
23 |
from fastlid import fastlid
|
24 |
from icecream import ic
|
25 |
from loguru import logger as loggu
|
26 |
from logzero import logger
|
27 |
from set_loglevel import set_loglevel
|
28 |
+
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
29 |
+
|
30 |
from streamlit import session_state as state
|
31 |
|
32 |
+
from litbee.t2s import t2s
|
33 |
|
34 |
|
35 |
def fetch_upload(): # noqa
|
|
|
45 |
return None
|
46 |
|
47 |
# src_fileio tgt_fileio
|
48 |
+
with st.form(key="upload_in_form"):
|
49 |
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
50 |
with _:
|
51 |
col1, col2 = st.columns(2)
|
|
|
69 |
key="tgt_text",
|
70 |
# accept_multiple_files=True,
|
71 |
)
|
72 |
+
submitted = st.form_submit_button("Submit")
|
73 |
|
74 |
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
75 |
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
|
|
129 |
if not (filename1 or filename2):
|
130 |
st.write("| no file uploaded")
|
131 |
return None
|
132 |
+
|
133 |
+
if not filename1:
|
134 |
st.write("| file1 not ready")
|
135 |
return None
|
136 |
+
|
137 |
+
if not filename2:
|
138 |
st.write("| file2 not ready")
|
139 |
return None
|
140 |
|
|
|
220 |
# logger.debug("fn.__doc__: %s", fn.__doc__)
|
221 |
logger.debug("fn.__name__: %s", fn.__name__)
|
222 |
|
223 |
+
# from inspect import getabsfile
|
224 |
+
# logger.debug("getabsfile(fn): %s", getabsfile(fn))
|
225 |
+
|
226 |
+
# convert to simplified chinese if is_tranditional
|
227 |
|
228 |
with st.spinner(" diggin..."):
|
229 |
then = perf_counter()
|
230 |
try:
|
231 |
# aset = ezbee/dzbee/debee
|
232 |
aset = globals()[state.ns.beetype](
|
233 |
+
t2s(list1), # t2s, handle trand.chinese
|
234 |
+
t2s(list2),
|
235 |
# eps=eps,
|
236 |
# min_samples=min_samples,
|
237 |
)
|
|
|
273 |
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
274 |
# logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
|
275 |
|
276 |
+
df_a = pd.DataFrame(
|
277 |
+
aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
|
278 |
+
)
|
279 |
|
280 |
# if set_loglevel() <= 10:
|
281 |
_ = st.expander("done aligned")
|
|
|
309 |
# width="100%", # width parameter is deprecated
|
310 |
height=750,
|
311 |
# fit_columns_on_grid_load=True,
|
312 |
+
update_mode=GridUpdateMode.MODEL_CHANGED,
|
313 |
)
|
314 |
+
|
315 |
+
return None
|
litbee/home.py
CHANGED
@@ -2,9 +2,10 @@
|
|
2 |
|
3 |
org ezbee_page.py.
|
4 |
"""
|
|
|
|
|
5 |
# pylint: disable=invalid-name
|
6 |
from functools import partial
|
7 |
-
import inspect
|
8 |
from itertools import zip_longest
|
9 |
from time import perf_counter
|
10 |
|
@@ -12,18 +13,19 @@ import logzero
|
|
12 |
import numpy as np
|
13 |
import pandas as pd
|
14 |
import streamlit as st
|
15 |
-
from dzbee import dzbee # noqa
|
16 |
-
from ezbee import ezbee # noqa
|
17 |
-
from debee import debee # noqa
|
18 |
|
19 |
# from ezbee.gen_pairs import gen_pairs # aset2pairs?
|
20 |
from aset2pairs import aset2pairs
|
|
|
|
|
|
|
21 |
from fastlid import fastlid
|
22 |
from icecream import ic
|
23 |
from loguru import logger as loggu
|
24 |
from logzero import logger
|
25 |
from set_loglevel import set_loglevel
|
26 |
-
from st_aggrid import AgGrid,
|
|
|
27 |
# from st_aggrid.grid_options_builder import GridOptionsBuilder
|
28 |
from streamlit import session_state as state
|
29 |
|
@@ -43,7 +45,7 @@ def home(): # noqa
|
|
43 |
return None
|
44 |
|
45 |
# src_fileio tgt_fileio
|
46 |
-
with st.form(key=
|
47 |
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
48 |
with _:
|
49 |
col1, col2 = st.columns(2)
|
@@ -67,7 +69,7 @@ def home(): # noqa
|
|
67 |
key="tgt_text",
|
68 |
# accept_multiple_files=True,
|
69 |
)
|
70 |
-
submitted = st.form_submit_button(
|
71 |
|
72 |
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
73 |
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
@@ -217,6 +219,7 @@ def home(): # noqa
|
|
217 |
logger.debug("fn.__name__: %s", fn.__name__)
|
218 |
|
219 |
from inspect import getabsfile
|
|
|
220 |
logger.debug("getabsfile(fn): %s", getabsfile(fn))
|
221 |
|
222 |
with st.spinner(" diggin..."):
|
@@ -267,7 +270,9 @@ def home(): # noqa
|
|
267 |
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
268 |
# logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
|
269 |
|
270 |
-
df_a = pd.DataFrame(
|
|
|
|
|
271 |
|
272 |
# if set_loglevel() <= 10:
|
273 |
_ = st.expander("done aligned")
|
@@ -301,5 +306,5 @@ def home(): # noqa
|
|
301 |
# width="100%", # width parameter is deprecated
|
302 |
height=750,
|
303 |
# fit_columns_on_grid_load=True,
|
304 |
-
update_mode=GridUpdateMode.MODEL_CHANGED
|
305 |
)
|
|
|
2 |
|
3 |
org ezbee_page.py.
|
4 |
"""
|
5 |
+
import inspect
|
6 |
+
|
7 |
# pylint: disable=invalid-name
|
8 |
from functools import partial
|
|
|
9 |
from itertools import zip_longest
|
10 |
from time import perf_counter
|
11 |
|
|
|
13 |
import numpy as np
|
14 |
import pandas as pd
|
15 |
import streamlit as st
|
|
|
|
|
|
|
16 |
|
17 |
# from ezbee.gen_pairs import gen_pairs # aset2pairs?
|
18 |
from aset2pairs import aset2pairs
|
19 |
+
from debee import debee # noqa
|
20 |
+
from dzbee import dzbee # noqa
|
21 |
+
from ezbee import ezbee # noqa
|
22 |
from fastlid import fastlid
|
23 |
from icecream import ic
|
24 |
from loguru import logger as loggu
|
25 |
from logzero import logger
|
26 |
from set_loglevel import set_loglevel
|
27 |
+
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
|
28 |
+
|
29 |
# from st_aggrid.grid_options_builder import GridOptionsBuilder
|
30 |
from streamlit import session_state as state
|
31 |
|
|
|
45 |
return None
|
46 |
|
47 |
# src_fileio tgt_fileio
|
48 |
+
with st.form(key="upload_in_form"):
|
49 |
_ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
|
50 |
with _:
|
51 |
col1, col2 = st.columns(2)
|
|
|
69 |
key="tgt_text",
|
70 |
# accept_multiple_files=True,
|
71 |
)
|
72 |
+
submitted = st.form_submit_button("Submit")
|
73 |
|
74 |
# logger.debug(" len(src_fileio): %s", len(src_fileio))
|
75 |
# logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
|
|
|
219 |
logger.debug("fn.__name__: %s", fn.__name__)
|
220 |
|
221 |
from inspect import getabsfile
|
222 |
+
|
223 |
logger.debug("getabsfile(fn): %s", getabsfile(fn))
|
224 |
|
225 |
with st.spinner(" diggin..."):
|
|
|
270 |
logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
|
271 |
# logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
|
272 |
|
273 |
+
df_a = pd.DataFrame(
|
274 |
+
aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
|
275 |
+
)
|
276 |
|
277 |
# if set_loglevel() <= 10:
|
278 |
_ = st.expander("done aligned")
|
|
|
306 |
# width="100%", # width parameter is deprecated
|
307 |
height=750,
|
308 |
# fit_columns_on_grid_load=True,
|
309 |
+
update_mode=GridUpdateMode.MODEL_CHANGED,
|
310 |
)
|
litbee/info.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
"""Present info about litbee."""
|
2 |
-
import ezbee
|
3 |
-
import dzbee
|
4 |
-
import debee
|
5 |
-
|
6 |
from textwrap import dedent
|
|
|
|
|
|
|
|
|
7 |
import streamlit as st
|
|
|
8 |
from litbee import __version__
|
9 |
|
10 |
# from litbee.utils import style_css
|
@@ -23,7 +24,9 @@ msg = dedent(
|
|
23 |
<li> xbee/bumblebee: other language pairs, normal para-align
|
24 |
</ul>
|
25 |
|
26 |
-
The algorithm for fast para-align is home-brewn. Two
|
|
|
|
|
27 |
</div>
|
28 |
"""
|
29 |
).strip()
|
|
|
1 |
"""Present info about litbee."""
|
|
|
|
|
|
|
|
|
2 |
from textwrap import dedent
|
3 |
+
|
4 |
+
import debee
|
5 |
+
import dzbee
|
6 |
+
import ezbee
|
7 |
import streamlit as st
|
8 |
+
|
9 |
from litbee import __version__
|
10 |
|
11 |
# from litbee.utils import style_css
|
|
|
24 |
<li> xbee/bumblebee: other language pairs, normal para-align
|
25 |
</ul>
|
26 |
|
27 |
+
The algorithm for fast para-align is home-brewn. Two
|
28 |
+
sent-align algorithms are used: one based on Gale-Church,
|
29 |
+
the other machine learning.
|
30 |
</div>
|
31 |
"""
|
32 |
).strip()
|
litbee/multipage.py
CHANGED
@@ -13,29 +13,24 @@ class Multipage:
|
|
13 |
"""Framework for combining multiple streamlit applications."""
|
14 |
|
15 |
def __init__(self) -> None:
|
16 |
-
"""
|
17 |
self.pages = []
|
18 |
|
19 |
def add_page(self, title, icon, func) -> None:
|
20 |
-
"""Class Method to Add pages to the project
|
21 |
|
22 |
Args:
|
23 |
title ([str]): The title of page which we are adding to the list of apps
|
24 |
-
|
25 |
func: Python function to render this page in Streamlit
|
26 |
"""
|
27 |
-
|
28 |
-
self.pages.append(
|
29 |
-
{
|
30 |
-
"title": title,
|
31 |
-
"icon": icon,
|
32 |
-
"function": func
|
33 |
-
}
|
34 |
-
)
|
35 |
|
36 |
def run(self):
|
37 |
-
|
38 |
-
|
|
|
|
|
39 |
<style>
|
40 |
section[data-testid="stSidebar"] > div:first-of-type {
|
41 |
background-color: var(--secondary-background-color);
|
@@ -51,12 +46,18 @@ class Multipage:
|
|
51 |
padding: 1rem 0;
|
52 |
}
|
53 |
</style>
|
54 |
-
""",
|
|
|
|
|
55 |
|
56 |
with st.sidebar:
|
57 |
-
selected = option_menu(
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
60 |
|
61 |
# Run the selected page
|
62 |
for index, item in enumerate(self.pages):
|
|
|
13 |
"""Framework for combining multiple streamlit applications."""
|
14 |
|
15 |
def __init__(self) -> None:
|
16 |
+
"""Construct class to generate a list which will store all our applications as an instance variable."""
|
17 |
self.pages = []
|
18 |
|
19 |
def add_page(self, title, icon, func) -> None:
|
20 |
+
"""Class Method to Add pages to the project.
|
21 |
|
22 |
Args:
|
23 |
title ([str]): The title of page which we are adding to the list of apps
|
24 |
+
icon: icon from streamlit-menu-option
|
25 |
func: Python function to render this page in Streamlit
|
26 |
"""
|
27 |
+
self.pages.append({"title": title, "icon": icon, "function": func})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def run(self):
|
30 |
+
"""Dropdown to select the page to run."""
|
31 |
+
# Dropdown to select the page to run
|
32 |
+
st.markdown(
|
33 |
+
"""
|
34 |
<style>
|
35 |
section[data-testid="stSidebar"] > div:first-of-type {
|
36 |
background-color: var(--secondary-background-color);
|
|
|
46 |
padding: 1rem 0;
|
47 |
}
|
48 |
</style>
|
49 |
+
""",
|
50 |
+
unsafe_allow_html=True,
|
51 |
+
)
|
52 |
|
53 |
with st.sidebar:
|
54 |
+
selected = option_menu(
|
55 |
+
None,
|
56 |
+
[page["title"] for page in self.pages],
|
57 |
+
icons=[page["icon"] for page in self.pages],
|
58 |
+
menu_icon="cast",
|
59 |
+
default_index=0,
|
60 |
+
)
|
61 |
|
62 |
# Run the selected page
|
63 |
for index, item in enumerate(self.pages):
|
litbee/options.py
CHANGED
@@ -7,12 +7,13 @@ from loguru import logger as loggu
|
|
7 |
from logzero import logger
|
8 |
from streamlit import session_state as state
|
9 |
|
|
|
|
|
10 |
# from litbee.ezbee_page import ezbee_page
|
11 |
# from litbee.dzbee_page import dzbee_page
|
12 |
# from litbee.xbee_page import xbee_page
|
13 |
from litbee.fetch_upload import fetch_upload
|
14 |
from litbee.fetch_urls import fetch_urls
|
15 |
-
from litbee.fetch_paste import fetch_paste
|
16 |
from litbee.files2df import files2df
|
17 |
from litbee.utils import instructions, sb_front_cover
|
18 |
|
@@ -49,7 +50,7 @@ def options():
|
|
49 |
# if beetype not in ["ezbee", "dzbee"]:
|
50 |
if beetype not in ["ezbee", "dzbee", "debee"]:
|
51 |
st.write("Coming soon")
|
52 |
-
return
|
53 |
|
54 |
# multi-page setup
|
55 |
menu = {
|
|
|
7 |
from logzero import logger
|
8 |
from streamlit import session_state as state
|
9 |
|
10 |
+
from litbee.fetch_paste import fetch_paste
|
11 |
+
|
12 |
# from litbee.ezbee_page import ezbee_page
|
13 |
# from litbee.dzbee_page import dzbee_page
|
14 |
# from litbee.xbee_page import xbee_page
|
15 |
from litbee.fetch_upload import fetch_upload
|
16 |
from litbee.fetch_urls import fetch_urls
|
|
|
17 |
from litbee.files2df import files2df
|
18 |
from litbee.utils import instructions, sb_front_cover
|
19 |
|
|
|
50 |
# if beetype not in ["ezbee", "dzbee"]:
|
51 |
if beetype not in ["ezbee", "dzbee", "debee"]:
|
52 |
st.write("Coming soon")
|
53 |
+
return
|
54 |
|
55 |
# multi-page setup
|
56 |
menu = {
|
litbee/settings.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
"""Prep Settings/Options page."""
|
2 |
# pylint: disable=invalid-name
|
3 |
from functools import partial
|
|
|
4 |
import streamlit as st
|
5 |
from loguru import logger as loggu
|
6 |
from logzero import logger
|
@@ -10,7 +11,8 @@ from streamlit import session_state as state
|
|
10 |
def settings():
|
11 |
"""Prep Settings/Options page.
|
12 |
|
13 |
-
Refer to options.py
|
|
|
14 |
# horizotal radio
|
15 |
st.write(
|
16 |
"<style>div.row-widget.stRadio > div{flex-direction:row;}</style>",
|
@@ -24,6 +26,7 @@ def settings():
|
|
24 |
# col1, col2 = st.columns(2)
|
25 |
|
26 |
# with col1:
|
|
|
27 |
try:
|
28 |
index = beetype_list.index(state.ns.beetype)
|
29 |
except Exception as e:
|
@@ -34,7 +37,7 @@ def settings():
|
|
34 |
beetype_list,
|
35 |
index=index,
|
36 |
format_func=lambda x: f"{x:<7} |",
|
37 |
-
help=
|
38 |
)
|
39 |
state.ns.beetype = beetype
|
40 |
|
@@ -70,11 +73,27 @@ def settings():
|
|
70 |
)
|
71 |
state.ns.sourcecount = sourcecount
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
# show state.ns[:6]
|
74 |
loggu.debug(f" state.ns.list: {state.ns.list}")
|
75 |
|
76 |
# beetype, sourcetype, sourcecount, filename1, filename2
|
77 |
-
_ = map(partial(getattr, state.ns), state.ns.list[:
|
78 |
logger.debug(" state.ns.list[:3]: %s", str([*_]))
|
79 |
|
80 |
# st.write(f"run: {state.ns.count}")
|
|
|
1 |
"""Prep Settings/Options page."""
|
2 |
# pylint: disable=invalid-name
|
3 |
from functools import partial
|
4 |
+
|
5 |
import streamlit as st
|
6 |
from loguru import logger as loggu
|
7 |
from logzero import logger
|
|
|
11 |
def settings():
|
12 |
"""Prep Settings/Options page.
|
13 |
|
14 |
+
Refer to options.py
|
15 |
+
"""
|
16 |
# horizotal radio
|
17 |
st.write(
|
18 |
"<style>div.row-widget.stRadio > div{flex-direction:row;}</style>",
|
|
|
26 |
# col1, col2 = st.columns(2)
|
27 |
|
28 |
# with col1:
|
29 |
+
_ = "ezbee: english-chinese; dzbee: german-chinese, debee: german-english; xbee: other language pairs (slow, approx.1000 pairs/3 min) | ezbee: 英/中; dzbee: 德/中, debee: 德/英; xbee: 其他语言对(慢, 约1000对/3分钟)"
|
30 |
try:
|
31 |
index = beetype_list.index(state.ns.beetype)
|
32 |
except Exception as e:
|
|
|
37 |
beetype_list,
|
38 |
index=index,
|
39 |
format_func=lambda x: f"{x:<7} |",
|
40 |
+
help=_,
|
41 |
)
|
42 |
state.ns.beetype = beetype
|
43 |
|
|
|
73 |
)
|
74 |
state.ns.sourcecount = sourcecount
|
75 |
|
76 |
+
sentali_list = [None, "fast", "slow"]
|
77 |
+
try:
|
78 |
+
index = sentali_list.index(state.ns.sentali)
|
79 |
+
except Exception as e:
|
80 |
+
logger.error("sentali index error: %s, setting to 0", e)
|
81 |
+
index = 0
|
82 |
+
sentali = st.radio(
|
83 |
+
"Sent Align",
|
84 |
+
sentali_list,
|
85 |
+
index=index,
|
86 |
+
format_func=lambda x: f"{str(x):<4} |",
|
87 |
+
help="None: no sent align; fast: gale-church; slow: machine-learning",
|
88 |
+
disabled=True,
|
89 |
+
)
|
90 |
+
state.ns.sentali = sentali
|
91 |
+
|
92 |
# show state.ns[:6]
|
93 |
loggu.debug(f" state.ns.list: {state.ns.list}")
|
94 |
|
95 |
# beetype, sourcetype, sourcecount, filename1, filename2
|
96 |
+
_ = map(partial(getattr, state.ns), state.ns.list[:6])
|
97 |
logger.debug(" state.ns.list[:3]: %s", str([*_]))
|
98 |
|
99 |
# st.write(f"run: {state.ns.count}")
|
litbee/t2s.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Convert list to simlified Chinese for traditional Chinese, do nothing otherwise."""
|
2 |
+
# pylint: disable=invalid-name
|
3 |
+
from typing import List
|
4 |
+
import hanzidentifier
|
5 |
+
from logzero import logger
|
6 |
+
from opencc import OpenCC
|
7 |
+
|
8 |
+
convert = OpenCC('t2s').convert
|
9 |
+
|
10 |
+
|
11 |
+
def t2s(lst: List[str]) -> List[str]:
|
12 |
+
"""Convert list to simlified Chinese for traditional Chinese, do nothing otherwise.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
list of strings
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
list of strings
|
19 |
+
"""
|
20 |
+
try:
|
21 |
+
# lst[:1000] strim down for extremely large docs
|
22 |
+
_ = hanzidentifier.identify(" ".join(lst[:1000]))
|
23 |
+
except Exception as e:
|
24 |
+
logger.warning("hanzidentifier.is_traditional error: %s, settin to simplified.", e)
|
25 |
+
_ = hanzidentifier.SIMP # 2: simplified
|
26 |
+
|
27 |
+
if _ not in [hanzidentifier.TRAD, hanzidentifier.MIXED]:
|
28 |
+
return lst
|
29 |
+
|
30 |
+
res = []
|
31 |
+
for line in lst:
|
32 |
+
try:
|
33 |
+
_ = convert(line)
|
34 |
+
except Exception as e:
|
35 |
+
logger.warning("ts2 error: %s, setting to original", e)
|
36 |
+
_ = line
|
37 |
+
res.append(_)
|
38 |
+
|
39 |
+
return res
|
litbee/utils.py
CHANGED
@@ -40,7 +40,7 @@ msg = dedent(
|
|
40 |
|
41 |
|
42 |
def sb_front_cover():
|
43 |
-
"""Prep front cover for sidebar"""
|
44 |
st.sidebar.markdown(f"### litbee {__version__} ")
|
45 |
|
46 |
sb_tit_expander = st.sidebar.expander("More info (click to toggle)", expanded=False)
|
@@ -65,6 +65,7 @@ intructins = dedent(
|
|
65 |
|
66 |
|
67 |
def instructions():
|
|
|
68 |
logger.debug("instructions entry")
|
69 |
back_cover_expander = st.expander("Instructions")
|
70 |
with back_cover_expander:
|
|
|
40 |
|
41 |
|
42 |
def sb_front_cover():
|
43 |
+
"""Prep front cover for sidebar."""
|
44 |
st.sidebar.markdown(f"### litbee {__version__} ")
|
45 |
|
46 |
sb_tit_expander = st.sidebar.expander("More info (click to toggle)", expanded=False)
|
|
|
65 |
|
66 |
|
67 |
def instructions():
|
68 |
+
"""Prep msg."""
|
69 |
logger.debug("instructions entry")
|
70 |
back_cover_expander = st.expander("Instructions")
|
71 |
with back_cover_expander:
|
poetry.lock
CHANGED
@@ -519,6 +519,17 @@ python-versions = "*"
|
|
519 |
[package.extras]
|
520 |
test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
|
521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
522 |
[[package]]
|
523 |
name = "icecream"
|
524 |
version = "2.1.2"
|
@@ -1038,6 +1049,14 @@ category = "main"
|
|
1038 |
optional = false
|
1039 |
python-versions = ">=3.8"
|
1040 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1041 |
[[package]]
|
1042 |
name = "packaging"
|
1043 |
version = "21.3"
|
@@ -1129,6 +1148,17 @@ python-versions = ">=3.7"
|
|
1129 |
docs = ["olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinx-rtd-theme (>=1.0)", "sphinxext-opengraph"]
|
1130 |
tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
|
1131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1132 |
[[package]]
|
1133 |
name = "pluggy"
|
1134 |
version = "1.0.0"
|
@@ -1689,6 +1719,17 @@ python-versions = ">=3.6"
|
|
1689 |
[package.dependencies]
|
1690 |
streamlit = ">=0.63"
|
1691 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1692 |
[[package]]
|
1693 |
name = "terminado"
|
1694 |
version = "0.15.0"
|
@@ -1952,6 +1993,14 @@ category = "main"
|
|
1952 |
optional = false
|
1953 |
python-versions = ">=3.4"
|
1954 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1955 |
[[package]]
|
1956 |
name = "zipp"
|
1957 |
version = "3.8.0"
|
@@ -1967,7 +2016,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
|
|
1967 |
[metadata]
|
1968 |
lock-version = "1.1"
|
1969 |
python-versions = "^3.8.3"
|
1970 |
-
content-hash = "
|
1971 |
|
1972 |
[metadata.files]
|
1973 |
about-time = [
|
@@ -2259,6 +2308,9 @@ gitpython = [
|
|
2259 |
grapheme = [
|
2260 |
{file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
|
2261 |
]
|
|
|
|
|
|
|
2262 |
icecream = [
|
2263 |
{file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
|
2264 |
{file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
|
@@ -2489,6 +2541,10 @@ numpy = [
|
|
2489 |
{file = "numpy-1.22.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207"},
|
2490 |
{file = "numpy-1.22.4.zip", hash = "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af"},
|
2491 |
]
|
|
|
|
|
|
|
|
|
2492 |
packaging = [
|
2493 |
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
2494 |
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
@@ -2576,6 +2632,10 @@ pillow = [
|
|
2576 |
{file = "Pillow-9.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:baf3be0b9446a4083cc0c5bb9f9c964034be5374b5bc09757be89f5d2fa247b8"},
|
2577 |
{file = "Pillow-9.1.1.tar.gz", hash = "sha256:7502539939b53d7565f3d11d87c78e7ec900d3c72945d4ee0e2f250d598309a0"},
|
2578 |
]
|
|
|
|
|
|
|
|
|
2579 |
pluggy = [
|
2580 |
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
|
2581 |
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
|
@@ -3107,6 +3167,10 @@ streamlit-option-menu = [
|
|
3107 |
{file = "streamlit-option-menu-0.3.2.tar.gz", hash = "sha256:69d1aef6f30f83f29eda3dc9667733bc2e28cd640eb17b4b6ca315f633484c52"},
|
3108 |
{file = "streamlit_option_menu-0.3.2-py3-none-any.whl", hash = "sha256:0b7eae3ffdb0276c81d15750465c72957d57d2f766cb027c586d053519731178"},
|
3109 |
]
|
|
|
|
|
|
|
|
|
3110 |
terminado = [
|
3111 |
{file = "terminado-0.15.0-py3-none-any.whl", hash = "sha256:0d5f126fbfdb5887b25ae7d9d07b0d716b1cc0ccaacc71c1f3c14d228e065197"},
|
3112 |
{file = "terminado-0.15.0.tar.gz", hash = "sha256:ab4eeedccfcc1e6134bfee86106af90852c69d602884ea3a1e8ca6d4486e9bfe"},
|
@@ -3263,6 +3327,9 @@ xlsxwriter = [
|
|
3263 |
{file = "XlsxWriter-3.0.3-py3-none-any.whl", hash = "sha256:df0aefe5137478d206847eccf9f114715e42aaea077e6a48d0e8a2152e983010"},
|
3264 |
{file = "XlsxWriter-3.0.3.tar.gz", hash = "sha256:e89f4a1d2fa2c9ea15cde77de95cd3fd8b0345d0efb3964623f395c8c4988b7f"},
|
3265 |
]
|
|
|
|
|
|
|
3266 |
zipp = [
|
3267 |
{file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
|
3268 |
{file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
|
|
|
519 |
[package.extras]
|
520 |
test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
|
521 |
|
522 |
+
[[package]]
|
523 |
+
name = "hanzidentifier"
|
524 |
+
version = "1.0.2"
|
525 |
+
description = "Python module that identifies Chinese text as Simplified or Traditional."
|
526 |
+
category = "main"
|
527 |
+
optional = false
|
528 |
+
python-versions = "*"
|
529 |
+
|
530 |
+
[package.dependencies]
|
531 |
+
zhon = ">=1.1.3"
|
532 |
+
|
533 |
[[package]]
|
534 |
name = "icecream"
|
535 |
version = "2.1.2"
|
|
|
1049 |
optional = false
|
1050 |
python-versions = ">=3.8"
|
1051 |
|
1052 |
+
[[package]]
|
1053 |
+
name = "opencc-python-reimplemented"
|
1054 |
+
version = "0.1.6"
|
1055 |
+
description = "OpenCC made with Python"
|
1056 |
+
category = "main"
|
1057 |
+
optional = false
|
1058 |
+
python-versions = "*"
|
1059 |
+
|
1060 |
[[package]]
|
1061 |
name = "packaging"
|
1062 |
version = "21.3"
|
|
|
1148 |
docs = ["olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinx-rtd-theme (>=1.0)", "sphinxext-opengraph"]
|
1149 |
tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
|
1150 |
|
1151 |
+
[[package]]
|
1152 |
+
name = "plotly"
|
1153 |
+
version = "5.8.0"
|
1154 |
+
description = "An open-source, interactive data visualization library for Python"
|
1155 |
+
category = "main"
|
1156 |
+
optional = false
|
1157 |
+
python-versions = ">=3.6"
|
1158 |
+
|
1159 |
+
[package.dependencies]
|
1160 |
+
tenacity = ">=6.2.0"
|
1161 |
+
|
1162 |
[[package]]
|
1163 |
name = "pluggy"
|
1164 |
version = "1.0.0"
|
|
|
1719 |
[package.dependencies]
|
1720 |
streamlit = ">=0.63"
|
1721 |
|
1722 |
+
[[package]]
|
1723 |
+
name = "tenacity"
|
1724 |
+
version = "8.0.1"
|
1725 |
+
description = "Retry code until it succeeds"
|
1726 |
+
category = "main"
|
1727 |
+
optional = false
|
1728 |
+
python-versions = ">=3.6"
|
1729 |
+
|
1730 |
+
[package.extras]
|
1731 |
+
doc = ["reno", "sphinx", "tornado (>=4.5)"]
|
1732 |
+
|
1733 |
[[package]]
|
1734 |
name = "terminado"
|
1735 |
version = "0.15.0"
|
|
|
1993 |
optional = false
|
1994 |
python-versions = ">=3.4"
|
1995 |
|
1996 |
+
[[package]]
|
1997 |
+
name = "zhon"
|
1998 |
+
version = "1.1.5"
|
1999 |
+
description = "Zhon provides constants used in Chinese text processing."
|
2000 |
+
category = "main"
|
2001 |
+
optional = false
|
2002 |
+
python-versions = "*"
|
2003 |
+
|
2004 |
[[package]]
|
2005 |
name = "zipp"
|
2006 |
version = "3.8.0"
|
|
|
2016 |
[metadata]
|
2017 |
lock-version = "1.1"
|
2018 |
python-versions = "^3.8.3"
|
2019 |
+
content-hash = "eabc5b9c944b380d2a60b4ec1b1f218f4b1a3aea1426c7fb75fdc51d4889e57e"
|
2020 |
|
2021 |
[metadata.files]
|
2022 |
about-time = [
|
|
|
2308 |
grapheme = [
|
2309 |
{file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
|
2310 |
]
|
2311 |
+
hanzidentifier = [
|
2312 |
+
{file = "hanzidentifier-1.0.2.tar.gz", hash = "sha256:793a298430aa9a9d6ab344dc0ca0ab4bd1161d88c7da941d6554571093003cba"},
|
2313 |
+
]
|
2314 |
icecream = [
|
2315 |
{file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
|
2316 |
{file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
|
|
|
2541 |
{file = "numpy-1.22.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207"},
|
2542 |
{file = "numpy-1.22.4.zip", hash = "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af"},
|
2543 |
]
|
2544 |
+
opencc-python-reimplemented = [
|
2545 |
+
{file = "opencc-python-reimplemented-0.1.6.tar.gz", hash = "sha256:6e4eaae2bd6b04d5c1b5bd7f3f87904ba2f1caa982203fdf2610c8261e47ee24"},
|
2546 |
+
{file = "opencc_python_reimplemented-0.1.6-py3.8.egg", hash = "sha256:3071d7ddcecc1b5129434e713e35f73aab9f5bd507d728c908acdbb48879194d"},
|
2547 |
+
]
|
2548 |
packaging = [
|
2549 |
{file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
|
2550 |
{file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
|
|
|
2632 |
{file = "Pillow-9.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:baf3be0b9446a4083cc0c5bb9f9c964034be5374b5bc09757be89f5d2fa247b8"},
|
2633 |
{file = "Pillow-9.1.1.tar.gz", hash = "sha256:7502539939b53d7565f3d11d87c78e7ec900d3c72945d4ee0e2f250d598309a0"},
|
2634 |
]
|
2635 |
+
plotly = [
|
2636 |
+
{file = "plotly-5.8.0-py2.py3-none-any.whl", hash = "sha256:0e6e2382aafe2b2978d2c1b10ea93e73ad1ec80fa9a195ff6eea62af7905dfdc"},
|
2637 |
+
{file = "plotly-5.8.0.tar.gz", hash = "sha256:58cef3292f5994d82154d51fbc7338c48009fc47ea32ffe052ad29aaa15e0df9"},
|
2638 |
+
]
|
2639 |
pluggy = [
|
2640 |
{file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
|
2641 |
{file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
|
|
|
3167 |
{file = "streamlit-option-menu-0.3.2.tar.gz", hash = "sha256:69d1aef6f30f83f29eda3dc9667733bc2e28cd640eb17b4b6ca315f633484c52"},
|
3168 |
{file = "streamlit_option_menu-0.3.2-py3-none-any.whl", hash = "sha256:0b7eae3ffdb0276c81d15750465c72957d57d2f766cb027c586d053519731178"},
|
3169 |
]
|
3170 |
+
tenacity = [
|
3171 |
+
{file = "tenacity-8.0.1-py3-none-any.whl", hash = "sha256:f78f4ea81b0fabc06728c11dc2a8c01277bfc5181b321a4770471902e3eb844a"},
|
3172 |
+
{file = "tenacity-8.0.1.tar.gz", hash = "sha256:43242a20e3e73291a28bcbcacfd6e000b02d3857a9a9fff56b297a27afdc932f"},
|
3173 |
+
]
|
3174 |
terminado = [
|
3175 |
{file = "terminado-0.15.0-py3-none-any.whl", hash = "sha256:0d5f126fbfdb5887b25ae7d9d07b0d716b1cc0ccaacc71c1f3c14d228e065197"},
|
3176 |
{file = "terminado-0.15.0.tar.gz", hash = "sha256:ab4eeedccfcc1e6134bfee86106af90852c69d602884ea3a1e8ca6d4486e9bfe"},
|
|
|
3327 |
{file = "XlsxWriter-3.0.3-py3-none-any.whl", hash = "sha256:df0aefe5137478d206847eccf9f114715e42aaea077e6a48d0e8a2152e983010"},
|
3328 |
{file = "XlsxWriter-3.0.3.tar.gz", hash = "sha256:e89f4a1d2fa2c9ea15cde77de95cd3fd8b0345d0efb3964623f395c8c4988b7f"},
|
3329 |
]
|
3330 |
+
zhon = [
|
3331 |
+
{file = "zhon-1.1.5.tar.gz", hash = "sha256:793723575c46f10ace8846c579ce740b04c73e2aa583e04e000aedbd4a47f87f"},
|
3332 |
+
]
|
3333 |
zipp = [
|
3334 |
{file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
|
3335 |
{file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
|
pylintrc
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[EXCEPTIONS]
|
2 |
+
# C0301 line too long
|
3 |
+
# R0801 dup
|
4 |
+
# C0103 invalid-name
|
5 |
+
# W0612 unused-variable
|
6 |
+
# W0611 unused-import
|
7 |
+
disable=W0703,R0801,C0103,C0301,W0612,W0611
|
pyproject.toml
CHANGED
@@ -22,6 +22,9 @@ streamlit = "^1.9.2"
|
|
22 |
debee = "^0.1.0-alpha.2"
|
23 |
ezbee = "^0.1.0"
|
24 |
streamlit-option-menu = "^0.3.2"
|
|
|
|
|
|
|
25 |
|
26 |
[tool.poe.executor]
|
27 |
type = "poetry"
|
@@ -33,7 +36,9 @@ build = "poetry build"
|
|
33 |
_publish = "poetry publish"
|
34 |
release = ["test", "build", "_publish"]
|
35 |
lint = { cmd = "pylint litbee" }
|
36 |
-
|
|
|
|
|
37 |
docstyle = "pydocstyle --convention=google tests litbee"
|
38 |
tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
|
39 |
|
|
|
22 |
debee = "^0.1.0-alpha.2"
|
23 |
ezbee = "^0.1.0"
|
24 |
streamlit-option-menu = "^0.3.2"
|
25 |
+
plotly = "^5.8.0"
|
26 |
+
hanzidentifier = "^1.0.2"
|
27 |
+
opencc-python-reimplemented = "^0.1.6"
|
28 |
|
29 |
[tool.poe.executor]
|
30 |
type = "poetry"
|
|
|
36 |
_publish = "poetry publish"
|
37 |
release = ["test", "build", "_publish"]
|
38 |
lint = { cmd = "pylint litbee" }
|
39 |
+
isort = "isort tests litbee"
|
40 |
+
black = "black tests litbee"
|
41 |
+
format = ["isort", "black"]
|
42 |
docstyle = "pydocstyle --convention=google tests litbee"
|
43 |
tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
|
44 |
|
run-flake8.sh
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
flake8 --ignore F401,E501,F841
|
tests/test_t2s.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Test t2s."""
|
2 |
+
from litbee.t2s import t2s
|
3 |
+
|
4 |
+
|
5 |
+
def test_t2s1():
|
6 |
+
"""Test trivial t2s."""
|
7 |
+
assert t2s(["", ""]) == ["", ""]
|
8 |
+
assert t2s(["a\nb", ""]) == ["a\nb", ""]
|
9 |
+
assert t2s(["a\n\nb\n", ""]) == ["a\n\nb\n", ""]
|
10 |
+
|
11 |
+
|
12 |
+
def test_t2s2():
|
13 |
+
"""Test t2s."""
|
14 |
+
assert t2s(["需攜帶", "需攜帶"]) == ["需携带", "需携带"]
|
15 |
+
assert t2s(["需攜帶\n\n需攜帶\n", "需携带\n"]) == ["需携带\n\n需携带\n", "需携带\n"]
|