freemt commited on
Commit
b665d70
·
1 Parent(s): 78bc728

Update hanzidentifier opencc to handle traditionl chinese

Browse files
app-.py DELETED
@@ -1,140 +0,0 @@
1
- """Prep __main__.py.
2
-
3
- https://share.streamlit.io/deploy
4
- Advanced settings...
5
- Python version
6
- 3.7
7
- 3.8
8
- 3.9*
9
- 3.10
10
-
11
- https://docs.streamlit.io/knowledge-base/using-streamlit/hide-row-indices-displaying-dataframe
12
- Hide row indices when displaying a dataframe
13
- # CSS to inject contained in a string
14
- hide_table_row_index = '''
15
- <style>
16
- tbody th {display:none}
17
- .blank {display:none}
18
- </style>
19
- '''
20
- # Inject CSS with Markdown
21
- st.markdown(hide_table_row_index, unsafe_allow_html=True)
22
-
23
- # Display a static table
24
- st.table(df)
25
-
26
- # Hide row indices with st.dataframe
27
- # CSS to inject contained in a string
28
- hide_dataframe_row_index = '''
29
- <style>
30
- .row_heading.level0 {display:none}
31
- .blank {display:none}
32
- </style>
33
- '''
34
- # Inject CSS with Markdown
35
- st.markdown(hide_dataframe_row_index, unsafe_allow_html=True)
36
-
37
- # Display an interactive table
38
- st.dataframe(df)
39
-
40
- https://medium.com/@avra42/streamlit-python-cool-tricks-to-make-your-web-application-look-better-8abfc3763a5b
41
- hide_menu_style = '''
42
- <style>
43
- #MainMenu {visibility: hidden; }
44
- footer {visibility: hidden;}
45
- </style>
46
- '''
47
- st.markdown(hide_menu_style, unsafe_allow_html=True)
48
-
49
- """
50
- # pylint: disable=invalid-name
51
- import os
52
- import sys
53
- import time
54
- from pathlib import Path
55
- from types import SimpleNamespace
56
- from typing import Optional
57
-
58
- import loguru
59
- import logzero
60
- import pandas as pd
61
- import streamlit as st
62
- from loguru import logger as loggu
63
- from logzero import logger
64
- from set_loglevel import set_loglevel
65
- from streamlit import session_state as state
66
-
67
- from litbee import __version__, litbee
68
- from litbee.options import options
69
-
70
- # from litbee.files2df import files2df
71
- # from litbee.utils import sb_front_cover, instructions, menu_items
72
- # from litbee.ezbee_page import ezbee_page
73
- # from litbee.dzbee_page import dzbee_page
74
- # from litbee.xbee_page import xbee_page
75
- from litbee.utils import menu_items
76
-
77
- # from ezbee import ezbee
78
-
79
- curr_py = sys.version[:3]
80
- msg = f"Some packages litbee depends on can only run with Python 3.8, current python is {curr_py}, sorry..."
81
- assert curr_py == "3.8", msg
82
-
83
- os.environ["TZ"] = "Asia/Shanghai"
84
- time.tzset()
85
- os.environ["LOGLEVEL"] = "10" # uncomment this in dev
86
- logzero.loglevel(set_loglevel())
87
-
88
- loggu.remove()
89
- _ = (
90
- "<green>{time:YY-MM-DD HH:mm:ss}</green> | "
91
- "<level>{level: <5}</level> | <level>{message}</level> "
92
- "<cyan>{name}</cyan>:<cyan>{line}</cyan>"
93
- )
94
- loggu.add(
95
- sys.stderr,
96
- format=_,
97
- level=set_loglevel(),
98
- colorize=True,
99
- )
100
-
101
- # from PIL import Image
102
- # page_icon=Image.open("icon.ico"),
103
- st.set_page_config(
104
- page_title=f"litbee v{__version__}",
105
- # page_icon="🧊",
106
- page_icon="🐝",
107
- # layout="wide",
108
- initial_sidebar_state="auto", # "auto" or "expanded" or "collapsed",
109
- menu_items=menu_items,
110
- )
111
-
112
- # pd.set_option("precision", 2)
113
- pd.set_option("display.precision", 2)
114
- pd.options.display.float_format = "{:,.2f}".format
115
-
116
- _ = dict(
117
- beetype="ezbee",
118
- src_filename="",
119
- tgt_filename="",
120
- src_fileio=b"",
121
- tgt_fileio=b"",
122
- src_file="",
123
- tgt_file="",
124
- list1=[""],
125
- list2=[""],
126
- df=None,
127
- df_a=None,
128
- df_s_a=None,
129
- )
130
- if "ns" not in state:
131
- state.ns = SimpleNamespace(**_)
132
- state.ns.list = [*_]
133
-
134
-
135
- def main():
136
- """Bootstrap."""
137
- options()
138
-
139
-
140
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -135,6 +135,7 @@ _ = dict(
135
  beetype="ezbee",
136
  sourcetype="upload",
137
  sourcecount=2,
 
138
  src_filename="",
139
  tgt_filename="",
140
  src_fileio=b"",
 
135
  beetype="ezbee",
136
  sourcetype="upload",
137
  sourcecount=2,
138
+ sent_ali=None,
139
  src_filename="",
140
  tgt_filename="",
141
  src_fileio=b"",
litbee/__main__.py CHANGED
@@ -1,5 +1,7 @@
1
  """Run streamlit run app.py from __main__.py."""
 
2
  import sys
 
3
  from streamlit import cli
4
 
5
  sys.argv = ["streamlit", "run", "app.py"]
 
1
  """Run streamlit run app.py from __main__.py."""
2
+ # pylint: disable=no-value-for-parameter
3
  import sys
4
+
5
  from streamlit import cli
6
 
7
  sys.argv = ["streamlit", "run", "app.py"]
litbee/app-.py DELETED
@@ -1,140 +0,0 @@
1
- """Prep __main__.py.
2
-
3
- https://share.streamlit.io/deploy
4
- Advanced settings...
5
- Python version
6
- 3.7
7
- 3.8
8
- 3.9*
9
- 3.10
10
-
11
- https://docs.streamlit.io/knowledge-base/using-streamlit/hide-row-indices-displaying-dataframe
12
- Hide row indices when displaying a dataframe
13
- # CSS to inject contained in a string
14
- hide_table_row_index = '''
15
- <style>
16
- tbody th {display:none}
17
- .blank {display:none}
18
- </style>
19
- '''
20
- # Inject CSS with Markdown
21
- st.markdown(hide_table_row_index, unsafe_allow_html=True)
22
-
23
- # Display a static table
24
- st.table(df)
25
-
26
- # Hide row indices with st.dataframe
27
- # CSS to inject contained in a string
28
- hide_dataframe_row_index = '''
29
- <style>
30
- .row_heading.level0 {display:none}
31
- .blank {display:none}
32
- </style>
33
- '''
34
- # Inject CSS with Markdown
35
- st.markdown(hide_dataframe_row_index, unsafe_allow_html=True)
36
-
37
- # Display an interactive table
38
- st.dataframe(df)
39
-
40
- https://medium.com/@avra42/streamlit-python-cool-tricks-to-make-your-web-application-look-better-8abfc3763a5b
41
- hide_menu_style = '''
42
- <style>
43
- #MainMenu {visibility: hidden; }
44
- footer {visibility: hidden;}
45
- </style>
46
- '''
47
- st.markdown(hide_menu_style, unsafe_allow_html=True)
48
-
49
- """
50
- # pylint: disable=invalid-name
51
- import os
52
- import sys
53
- import time
54
- from pathlib import Path
55
- from types import SimpleNamespace
56
- from typing import Optional
57
-
58
- import loguru
59
- import logzero
60
- import pandas as pd
61
- import streamlit as st
62
- from loguru import logger as loggu
63
- from logzero import logger
64
- from set_loglevel import set_loglevel
65
- from streamlit import session_state as state
66
-
67
- from litbee import __version__, litbee
68
- from litbee.options import options
69
-
70
- # from litbee.files2df import files2df
71
- # from litbee.utils import sb_front_cover, instructions, menu_items
72
- # from litbee.ezbee_page import ezbee_page
73
- # from litbee.dzbee_page import dzbee_page
74
- # from litbee.xbee_page import xbee_page
75
- from litbee.utils import menu_items
76
-
77
- # from ezbee import ezbee
78
-
79
- curr_py = sys.version[:3]
80
- msg = f"Some packages litbee depends on can only run with Python 3.8, current python is {curr_py}, sorry..."
81
- assert curr_py == "3.8", msg
82
-
83
- os.environ["TZ"] = "Asia/Shanghai"
84
- time.tzset()
85
- os.environ["LOGLEVEL"] = "10" # uncomment this in dev
86
- logzero.loglevel(set_loglevel())
87
-
88
- loggu.remove()
89
- _ = (
90
- "<green>{time:YY-MM-DD HH:mm:ss}</green> | "
91
- "<level>{level: <5}</level> | <level>{message}</level> "
92
- "<cyan>{name}</cyan>:<cyan>{line}</cyan>"
93
- )
94
- loggu.add(
95
- sys.stderr,
96
- format=_,
97
- level=set_loglevel(),
98
- colorize=True,
99
- )
100
-
101
- # from PIL import Image
102
- # page_icon=Image.open("icon.ico"),
103
- st.set_page_config( # type: ignore
104
- page_title=f"litbee v{__version__}",
105
- # page_icon="🧊",
106
- page_icon="🐝",
107
- # layout="wide",
108
- initial_sidebar_state="auto", # "auto" or "expanded" or "collapsed",
109
- menu_items=menu_items,
110
- )
111
-
112
- # pd.set_option("precision", 2)
113
- pd.set_option("display.precision", 2)
114
- pd.options.display.float_format = "{:,.2f}".format
115
-
116
- _ = dict(
117
- beetype="ezbee",
118
- src_filename="",
119
- tgt_filename="",
120
- src_fileio=b"",
121
- tgt_fileio=b"",
122
- src_file="",
123
- tgt_file="",
124
- list1=[""],
125
- list2=[""],
126
- df=None,
127
- df_a=None,
128
- df_s_a=None,
129
- )
130
- if "ns" not in state:
131
- state.ns = SimpleNamespace(**_)
132
- state.ns.list = [*_]
133
-
134
-
135
- def main():
136
- """Bootstrap."""
137
- options()
138
-
139
-
140
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
litbee/app.py CHANGED
@@ -55,13 +55,12 @@ from pathlib import Path
55
  from types import SimpleNamespace
56
  from typing import Optional
57
 
 
 
 
58
  import loguru
59
  import logzero
60
  import pandas as pd
61
- import ezbee
62
- import dzbee
63
- import debee
64
-
65
  import streamlit as st
66
  from loguru import logger as loggu
67
  from logzero import logger
@@ -69,25 +68,25 @@ from set_loglevel import set_loglevel
69
  from streamlit import session_state as state
70
 
71
  from litbee import __version__
72
- # from litbee.options import options
 
 
 
73
 
74
  # from litbee.files2df import files2df
75
  # from litbee.utils import sb_front_cover, instructions, menu_items
76
  # from litbee.ezbee_page import ezbee_page
77
  # from litbee.dzbee_page import dzbee_page
78
  # from litbee.xbee_page import xbee_page
79
- from litbee.utils import menu_items
 
 
80
 
81
- from litbee.multipage import Multipage
82
 
83
  # from litbee.fetch_upload import fetch_upload
84
  # from litbee.fetch_paste import fetch_paste
85
  # from litbee.fetch_urls import fetch_urls
86
 
87
- from litbee.home import home
88
- from litbee.settings import settings
89
- from litbee.info import info
90
- from litbee.utils import style_css
91
 
92
  # from ezbee import ezbee
93
 
 
55
  from types import SimpleNamespace
56
  from typing import Optional
57
 
58
+ import debee
59
+ import dzbee
60
+ import ezbee
61
  import loguru
62
  import logzero
63
  import pandas as pd
 
 
 
 
64
  import streamlit as st
65
  from loguru import logger as loggu
66
  from logzero import logger
 
68
  from streamlit import session_state as state
69
 
70
  from litbee import __version__
71
+ from litbee.home import home
72
+ from litbee.info import info
73
+ from litbee.multipage import Multipage
74
+ from litbee.settings import settings
75
 
76
  # from litbee.files2df import files2df
77
  # from litbee.utils import sb_front_cover, instructions, menu_items
78
  # from litbee.ezbee_page import ezbee_page
79
  # from litbee.dzbee_page import dzbee_page
80
  # from litbee.xbee_page import xbee_page
81
+ from litbee.utils import menu_items, style_css
82
+
83
+ # from litbee.options import options
84
 
 
85
 
86
  # from litbee.fetch_upload import fetch_upload
87
  # from litbee.fetch_paste import fetch_paste
88
  # from litbee.fetch_urls import fetch_urls
89
 
 
 
 
 
90
 
91
  # from ezbee import ezbee
92
 
litbee/dzbee_page.py DELETED
@@ -1,20 +0,0 @@
1
- """Display dzbee page."""
2
- import pandas as pd
3
- import streamlit as st
4
- from loguru import logger as loggu
5
- from logzero import logger
6
-
7
-
8
- def dzbee_page():
9
- """Display dzbee page."""
10
- # st.title('dzbee')
11
- # st.write('Welcome to app1')
12
-
13
- try:
14
- df = st.session_state.ns.df
15
- except Exception as exc:
16
- logger.error(exc)
17
- df = pd.DataFrame([[""]])
18
-
19
- loggu.debug(" df ")
20
- st.table(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
litbee/ezbee_page.py DELETED
@@ -1,129 +0,0 @@
1
- """Display ezbee page."""
2
- from functools import partial
3
-
4
- import logzero
5
- import pandas as pd
6
- import streamlit as st
7
- from ezbee import ezbee
8
- from ezbee.gen_pairs import gen_pairs
9
- from loguru import logger as loggu
10
- from logzero import logger
11
- from set_loglevel import set_loglevel
12
- from st_aggrid import AgGrid
13
- from st_aggrid.grid_options_builder import GridOptionsBuilder
14
- from streamlit import session_state as state
15
-
16
- logzero.loglevel(set_loglevel())
17
-
18
-
19
- def st_radio_horizontal(*args, **kwargs):
20
- """Trick to have horizontal st radio to simulate tabs."""
21
- col, _ = st.columns(2)
22
- with col:
23
- # st.write('<style> div[data-testid=column] > div > div > div > div.stRadio > div{flex-direction: row;}</style>', unsafe_allow_html=True)
24
- # return st.radio(*args, **kwargs)
25
- st.write(
26
- "<style> div[data-testid=stSidebar] > div > div > div > div > div > div > div.stRadio > div{flex-direction: row;}</style>",
27
- unsafe_allow_html=True,
28
- )
29
- return st.sidebar.radio(*args, **kwargs)
30
-
31
-
32
- def ezbee_page():
33
- """Display ezbee page."""
34
- # st.title('ezbee')
35
- # st.write('### ezbee')
36
- # st.write('Welcome to app1')
37
-
38
- _ = """
39
- try:
40
- df = st.session_state.ns.df
41
- except Exception as exc:
42
- logger.error(exc)
43
- df = pd.DataFrame([[""]])
44
- # """
45
-
46
- # st.table(df) # looks alright
47
-
48
- # stlyed pd dataframe?
49
- # bigger, no pagination
50
- # st.markdown(df.to_html(), unsafe_allow_html=True)
51
-
52
- # ag_grid smallish, editable, probably slower
53
-
54
- if "df" not in globals():
55
- logger.debug(" df not defined, return")
56
- return None
57
-
58
- df = pd.DataFrame([["", "", ""]], columns=["text1", "text2", "llh"])
59
-
60
- df_exp = st.expander("to be aligned", expanded=False)
61
- with df_exp:
62
- st.write(df) # too small
63
-
64
- _ = """
65
- ag_exp = st.expander("done aligned") # , expanded=False
66
- with ag_exp:
67
- agdf = AgGrid(
68
- df,
69
- # fit_columns_on_grid_load=True,
70
- editable=True,
71
- gridOptions=gridOptions,
72
- key="ag_exp",
73
- )
74
- # """
75
-
76
- list1 = [elm.strip() for elm in df.text1 if elm.strip()]
77
- list2 = [elm.strip() for elm in df.text2 if elm.strip()]
78
- logger.info("Processing data...")
79
- try:
80
- aset = ezbee(
81
- list1,
82
- list2,
83
- # eps=eps,
84
- # min_samples=min_samples,
85
- )
86
- except Exception as e:
87
- logger.error("aset = ezbee(...) exc: %s", e)
88
- aset = ""
89
- return None
90
-
91
- # fastlid changed logger.level is changed to 20
92
- # turn back to loglevel
93
- logzero.loglevel(set_loglevel())
94
- if aset:
95
- logger.debug("aset: %s...%s", aset[:3], aset[-3:])
96
-
97
- # st.write(aset)
98
-
99
- aligned_pairs = gen_pairs(list1, list2, aset)
100
- if aligned_pairs:
101
- logger.debug("%s...%s", aligned_pairs[:3], aligned_pairs[-3:])
102
-
103
- df_a = pd.DataFrame(aligned_pairs, columns=["text1", "text2", "llh"])
104
-
105
- # insert seq no
106
- df_a.insert(0, "sn", range(len(df_a)))
107
-
108
- gb = GridOptionsBuilder.from_dataframe(df_a)
109
- gb.configure_pagination(paginationAutoPageSize=True)
110
- options = {
111
- "resizable": True,
112
- "autoHeight": True,
113
- "wrapText": True,
114
- "editable": True,
115
- }
116
- gb.configure_default_column(**options)
117
- gridOptions = gb.build()
118
-
119
- st.write("aligned (double-click a cell to edit)")
120
- agdf = AgGrid(
121
- # df,
122
- df_a,
123
- gridOptions=gridOptions,
124
- key="outside",
125
- editable=True,
126
- width="100%",
127
- height=500,
128
- # fit_columns_on_grid_load=True,
129
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
litbee/fetch_upload.py CHANGED
@@ -3,31 +3,33 @@
3
  org ezbee_page.py.
4
  """
5
  # pylint: disable=invalid-name
6
- from functools import partial
7
  import inspect
 
 
8
  from itertools import zip_longest
9
  from time import perf_counter
10
 
 
11
  import logzero
12
  import numpy as np
13
  import pandas as pd
14
  import streamlit as st
15
- from dzbee import dzbee # noqa
16
- from ezbee import ezbee # noqa
17
- from debee import debee # noqa
18
 
19
- # from ezbee.gen_pairs import gen_pairs # aset2pairs?
20
  from aset2pairs import aset2pairs
 
 
 
21
  from fastlid import fastlid
22
  from icecream import ic
23
  from loguru import logger as loggu
24
  from logzero import logger
25
  from set_loglevel import set_loglevel
26
- from st_aggrid import AgGrid, GridUpdateMode, GridOptionsBuilder
27
- # from st_aggrid.grid_options_builder import GridOptionsBuilder
28
  from streamlit import session_state as state
29
 
30
- # logzero.loglevel(set_loglevel())
31
 
32
 
33
  def fetch_upload(): # noqa
@@ -43,7 +45,7 @@ def fetch_upload(): # noqa
43
  return None
44
 
45
  # src_fileio tgt_fileio
46
- with st.form(key='upload_in_form'):
47
  _ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
48
  with _:
49
  col1, col2 = st.columns(2)
@@ -67,7 +69,7 @@ def fetch_upload(): # noqa
67
  key="tgt_text",
68
  # accept_multiple_files=True,
69
  )
70
- submitted = st.form_submit_button('Submit')
71
 
72
  # logger.debug(" len(src_fileio): %s", len(src_fileio))
73
  # logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
@@ -127,10 +129,12 @@ def fetch_upload(): # noqa
127
  if not (filename1 or filename2):
128
  st.write("| no file uploaded")
129
  return None
130
- elif not filename1:
 
131
  st.write("| file1 not ready")
132
  return None
133
- elif not filename2:
 
134
  st.write("| file2 not ready")
135
  return None
136
 
@@ -216,16 +220,18 @@ def fetch_upload(): # noqa
216
  # logger.debug("fn.__doc__: %s", fn.__doc__)
217
  logger.debug("fn.__name__: %s", fn.__name__)
218
 
219
- from inspect import getabsfile
220
- logger.debug("getabsfile(fn): %s", getabsfile(fn))
 
 
221
 
222
  with st.spinner(" diggin..."):
223
  then = perf_counter()
224
  try:
225
  # aset = ezbee/dzbee/debee
226
  aset = globals()[state.ns.beetype](
227
- list1,
228
- list2,
229
  # eps=eps,
230
  # min_samples=min_samples,
231
  )
@@ -267,7 +273,9 @@ def fetch_upload(): # noqa
267
  logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
268
  # logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
269
 
270
- df_a = pd.DataFrame(aligned_pairs, columns=["text1", "text2", "llh"], dtype="object")
 
 
271
 
272
  # if set_loglevel() <= 10:
273
  _ = st.expander("done aligned")
@@ -301,5 +309,7 @@ def fetch_upload(): # noqa
301
  # width="100%", # width parameter is deprecated
302
  height=750,
303
  # fit_columns_on_grid_load=True,
304
- update_mode=GridUpdateMode.MODEL_CHANGED
305
  )
 
 
 
3
  org ezbee_page.py.
4
  """
5
  # pylint: disable=invalid-name
6
+ # pylint: disable=too-many-return-statements,too-many-branches,too-many-statements, too-many-locals
7
  import inspect
8
+
9
+ from functools import partial
10
  from itertools import zip_longest
11
  from time import perf_counter
12
 
13
+ import hanzidentifier
14
  import logzero
15
  import numpy as np
16
  import pandas as pd
17
  import streamlit as st
 
 
 
18
 
 
19
  from aset2pairs import aset2pairs
20
+ from debee import debee # noqa
21
+ from dzbee import dzbee # noqa
22
+ from ezbee import ezbee # noqa
23
  from fastlid import fastlid
24
  from icecream import ic
25
  from loguru import logger as loggu
26
  from logzero import logger
27
  from set_loglevel import set_loglevel
28
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
29
+
30
  from streamlit import session_state as state
31
 
32
+ from litbee.t2s import t2s
33
 
34
 
35
  def fetch_upload(): # noqa
 
45
  return None
46
 
47
  # src_fileio tgt_fileio
48
+ with st.form(key="upload_in_form"):
49
  _ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
50
  with _:
51
  col1, col2 = st.columns(2)
 
69
  key="tgt_text",
70
  # accept_multiple_files=True,
71
  )
72
+ submitted = st.form_submit_button("Submit")
73
 
74
  # logger.debug(" len(src_fileio): %s", len(src_fileio))
75
  # logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
 
129
  if not (filename1 or filename2):
130
  st.write("| no file uploaded")
131
  return None
132
+
133
+ if not filename1:
134
  st.write("| file1 not ready")
135
  return None
136
+
137
+ if not filename2:
138
  st.write("| file2 not ready")
139
  return None
140
 
 
220
  # logger.debug("fn.__doc__: %s", fn.__doc__)
221
  logger.debug("fn.__name__: %s", fn.__name__)
222
 
223
+ # from inspect import getabsfile
224
+ # logger.debug("getabsfile(fn): %s", getabsfile(fn))
225
+
226
+ # convert to simplified chinese if is_tranditional
227
 
228
  with st.spinner(" diggin..."):
229
  then = perf_counter()
230
  try:
231
  # aset = ezbee/dzbee/debee
232
  aset = globals()[state.ns.beetype](
233
+ t2s(list1), # t2s, handle trand.chinese
234
+ t2s(list2),
235
  # eps=eps,
236
  # min_samples=min_samples,
237
  )
 
273
  logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
274
  # logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
275
 
276
+ df_a = pd.DataFrame(
277
+ aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
278
+ )
279
 
280
  # if set_loglevel() <= 10:
281
  _ = st.expander("done aligned")
 
309
  # width="100%", # width parameter is deprecated
310
  height=750,
311
  # fit_columns_on_grid_load=True,
312
+ update_mode=GridUpdateMode.MODEL_CHANGED,
313
  )
314
+
315
+ return None
litbee/home.py CHANGED
@@ -2,9 +2,10 @@
2
 
3
  org ezbee_page.py.
4
  """
 
 
5
  # pylint: disable=invalid-name
6
  from functools import partial
7
- import inspect
8
  from itertools import zip_longest
9
  from time import perf_counter
10
 
@@ -12,18 +13,19 @@ import logzero
12
  import numpy as np
13
  import pandas as pd
14
  import streamlit as st
15
- from dzbee import dzbee # noqa
16
- from ezbee import ezbee # noqa
17
- from debee import debee # noqa
18
 
19
  # from ezbee.gen_pairs import gen_pairs # aset2pairs?
20
  from aset2pairs import aset2pairs
 
 
 
21
  from fastlid import fastlid
22
  from icecream import ic
23
  from loguru import logger as loggu
24
  from logzero import logger
25
  from set_loglevel import set_loglevel
26
- from st_aggrid import AgGrid, GridUpdateMode, GridOptionsBuilder
 
27
  # from st_aggrid.grid_options_builder import GridOptionsBuilder
28
  from streamlit import session_state as state
29
 
@@ -43,7 +45,7 @@ def home(): # noqa
43
  return None
44
 
45
  # src_fileio tgt_fileio
46
- with st.form(key='upload_in_form'):
47
  _ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
48
  with _:
49
  col1, col2 = st.columns(2)
@@ -67,7 +69,7 @@ def home(): # noqa
67
  key="tgt_text",
68
  # accept_multiple_files=True,
69
  )
70
- submitted = st.form_submit_button('Submit')
71
 
72
  # logger.debug(" len(src_fileio): %s", len(src_fileio))
73
  # logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
@@ -217,6 +219,7 @@ def home(): # noqa
217
  logger.debug("fn.__name__: %s", fn.__name__)
218
 
219
  from inspect import getabsfile
 
220
  logger.debug("getabsfile(fn): %s", getabsfile(fn))
221
 
222
  with st.spinner(" diggin..."):
@@ -267,7 +270,9 @@ def home(): # noqa
267
  logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
268
  # logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
269
 
270
- df_a = pd.DataFrame(aligned_pairs, columns=["text1", "text2", "llh"], dtype="object")
 
 
271
 
272
  # if set_loglevel() <= 10:
273
  _ = st.expander("done aligned")
@@ -301,5 +306,5 @@ def home(): # noqa
301
  # width="100%", # width parameter is deprecated
302
  height=750,
303
  # fit_columns_on_grid_load=True,
304
- update_mode=GridUpdateMode.MODEL_CHANGED
305
  )
 
2
 
3
  org ezbee_page.py.
4
  """
5
+ import inspect
6
+
7
  # pylint: disable=invalid-name
8
  from functools import partial
 
9
  from itertools import zip_longest
10
  from time import perf_counter
11
 
 
13
  import numpy as np
14
  import pandas as pd
15
  import streamlit as st
 
 
 
16
 
17
  # from ezbee.gen_pairs import gen_pairs # aset2pairs?
18
  from aset2pairs import aset2pairs
19
+ from debee import debee # noqa
20
+ from dzbee import dzbee # noqa
21
+ from ezbee import ezbee # noqa
22
  from fastlid import fastlid
23
  from icecream import ic
24
  from loguru import logger as loggu
25
  from logzero import logger
26
  from set_loglevel import set_loglevel
27
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode
28
+
29
  # from st_aggrid.grid_options_builder import GridOptionsBuilder
30
  from streamlit import session_state as state
31
 
 
45
  return None
46
 
47
  # src_fileio tgt_fileio
48
+ with st.form(key="upload_in_form"):
49
  _ = st.expander(f"{state.ns.beetype}: Pick two files", expanded=True)
50
  with _:
51
  col1, col2 = st.columns(2)
 
69
  key="tgt_text",
70
  # accept_multiple_files=True,
71
  )
72
+ submitted = st.form_submit_button("Submit")
73
 
74
  # logger.debug(" len(src_fileio): %s", len(src_fileio))
75
  # logger.debug(" len(tgt_fileio): %s", len(tgt_fileio))
 
219
  logger.debug("fn.__name__: %s", fn.__name__)
220
 
221
  from inspect import getabsfile
222
+
223
  logger.debug("getabsfile(fn): %s", getabsfile(fn))
224
 
225
  with st.spinner(" diggin..."):
 
270
  logger.debug("%s...%s", aligned_pairs[:1], aligned_pairs[-1:])
271
  # logger.debug("aligned_pairs[:20]: \n%s", aligned_pairs[:20])
272
 
273
+ df_a = pd.DataFrame(
274
+ aligned_pairs, columns=["text1", "text2", "llh"], dtype="object"
275
+ )
276
 
277
  # if set_loglevel() <= 10:
278
  _ = st.expander("done aligned")
 
306
  # width="100%", # width parameter is deprecated
307
  height=750,
308
  # fit_columns_on_grid_load=True,
309
+ update_mode=GridUpdateMode.MODEL_CHANGED,
310
  )
litbee/info.py CHANGED
@@ -1,10 +1,11 @@
1
  """Present info about litbee."""
2
- import ezbee
3
- import dzbee
4
- import debee
5
-
6
  from textwrap import dedent
 
 
 
 
7
  import streamlit as st
 
8
  from litbee import __version__
9
 
10
  # from litbee.utils import style_css
@@ -23,7 +24,9 @@ msg = dedent(
23
  <li> xbee/bumblebee: other language pairs, normal para-align
24
  </ul>
25
 
26
- The algorithm for fast para-align is home-brewn. Two sent-align algorithms are used: one based on Gale-Church, the other machine learning.
 
 
27
  </div>
28
  """
29
  ).strip()
 
1
  """Present info about litbee."""
 
 
 
 
2
  from textwrap import dedent
3
+
4
+ import debee
5
+ import dzbee
6
+ import ezbee
7
  import streamlit as st
8
+
9
  from litbee import __version__
10
 
11
  # from litbee.utils import style_css
 
24
  <li> xbee/bumblebee: other language pairs, normal para-align
25
  </ul>
26
 
27
+ The algorithm for fast para-align is home-brewn. Two
28
+ sent-align algorithms are used: one based on Gale-Church,
29
+ the other machine learning.
30
  </div>
31
  """
32
  ).strip()
litbee/multipage.py CHANGED
@@ -13,29 +13,24 @@ class Multipage:
13
  """Framework for combining multiple streamlit applications."""
14
 
15
  def __init__(self) -> None:
16
- """Constructor class to generate a list which will store all our applications as an instance variable."""
17
  self.pages = []
18
 
19
  def add_page(self, title, icon, func) -> None:
20
- """Class Method to Add pages to the project
21
 
22
  Args:
23
  title ([str]): The title of page which we are adding to the list of apps
24
-
25
  func: Python function to render this page in Streamlit
26
  """
27
-
28
- self.pages.append(
29
- {
30
- "title": title,
31
- "icon": icon,
32
- "function": func
33
- }
34
- )
35
 
36
  def run(self):
37
- # Drodown to select the page to run
38
- st.markdown("""
 
 
39
  <style>
40
  section[data-testid="stSidebar"] > div:first-of-type {
41
  background-color: var(--secondary-background-color);
@@ -51,12 +46,18 @@ class Multipage:
51
  padding: 1rem 0;
52
  }
53
  </style>
54
- """, unsafe_allow_html=True)
 
 
55
 
56
  with st.sidebar:
57
- selected = option_menu(None, [page["title"] for page in self.pages],
58
- icons=[page["icon"] for page in self.pages],
59
- menu_icon="cast", default_index=0)
 
 
 
 
60
 
61
  # Run the selected page
62
  for index, item in enumerate(self.pages):
 
13
  """Framework for combining multiple streamlit applications."""
14
 
15
  def __init__(self) -> None:
16
+ """Construct class to generate a list which will store all our applications as an instance variable."""
17
  self.pages = []
18
 
19
  def add_page(self, title, icon, func) -> None:
20
+ """Class Method to Add pages to the project.
21
 
22
  Args:
23
  title ([str]): The title of page which we are adding to the list of apps
24
+ icon: icon from streamlit-menu-option
25
  func: Python function to render this page in Streamlit
26
  """
27
+ self.pages.append({"title": title, "icon": icon, "function": func})
 
 
 
 
 
 
 
28
 
29
  def run(self):
30
+ """Dropdown to select the page to run."""
31
+ # Dropdown to select the page to run
32
+ st.markdown(
33
+ """
34
  <style>
35
  section[data-testid="stSidebar"] > div:first-of-type {
36
  background-color: var(--secondary-background-color);
 
46
  padding: 1rem 0;
47
  }
48
  </style>
49
+ """,
50
+ unsafe_allow_html=True,
51
+ )
52
 
53
  with st.sidebar:
54
+ selected = option_menu(
55
+ None,
56
+ [page["title"] for page in self.pages],
57
+ icons=[page["icon"] for page in self.pages],
58
+ menu_icon="cast",
59
+ default_index=0,
60
+ )
61
 
62
  # Run the selected page
63
  for index, item in enumerate(self.pages):
litbee/options.py CHANGED
@@ -7,12 +7,13 @@ from loguru import logger as loggu
7
  from logzero import logger
8
  from streamlit import session_state as state
9
 
 
 
10
  # from litbee.ezbee_page import ezbee_page
11
  # from litbee.dzbee_page import dzbee_page
12
  # from litbee.xbee_page import xbee_page
13
  from litbee.fetch_upload import fetch_upload
14
  from litbee.fetch_urls import fetch_urls
15
- from litbee.fetch_paste import fetch_paste
16
  from litbee.files2df import files2df
17
  from litbee.utils import instructions, sb_front_cover
18
 
@@ -49,7 +50,7 @@ def options():
49
  # if beetype not in ["ezbee", "dzbee"]:
50
  if beetype not in ["ezbee", "dzbee", "debee"]:
51
  st.write("Coming soon")
52
- return None
53
 
54
  # multi-page setup
55
  menu = {
 
7
  from logzero import logger
8
  from streamlit import session_state as state
9
 
10
+ from litbee.fetch_paste import fetch_paste
11
+
12
  # from litbee.ezbee_page import ezbee_page
13
  # from litbee.dzbee_page import dzbee_page
14
  # from litbee.xbee_page import xbee_page
15
  from litbee.fetch_upload import fetch_upload
16
  from litbee.fetch_urls import fetch_urls
 
17
  from litbee.files2df import files2df
18
  from litbee.utils import instructions, sb_front_cover
19
 
 
50
  # if beetype not in ["ezbee", "dzbee"]:
51
  if beetype not in ["ezbee", "dzbee", "debee"]:
52
  st.write("Coming soon")
53
+ return
54
 
55
  # multi-page setup
56
  menu = {
litbee/settings.py CHANGED
@@ -1,6 +1,7 @@
1
  """Prep Settings/Options page."""
2
  # pylint: disable=invalid-name
3
  from functools import partial
 
4
  import streamlit as st
5
  from loguru import logger as loggu
6
  from logzero import logger
@@ -10,7 +11,8 @@ from streamlit import session_state as state
10
  def settings():
11
  """Prep Settings/Options page.
12
 
13
- Refer to options.py"""
 
14
  # horizotal radio
15
  st.write(
16
  "<style>div.row-widget.stRadio > div{flex-direction:row;}</style>",
@@ -24,6 +26,7 @@ def settings():
24
  # col1, col2 = st.columns(2)
25
 
26
  # with col1:
 
27
  try:
28
  index = beetype_list.index(state.ns.beetype)
29
  except Exception as e:
@@ -34,7 +37,7 @@ def settings():
34
  beetype_list,
35
  index=index,
36
  format_func=lambda x: f"{x:<7} |",
37
- help="ezbee: english-chinese; dzbee: german-chinese, debee: german-english",
38
  )
39
  state.ns.beetype = beetype
40
 
@@ -70,11 +73,27 @@ def settings():
70
  )
71
  state.ns.sourcecount = sourcecount
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  # show state.ns[:6]
74
  loggu.debug(f" state.ns.list: {state.ns.list}")
75
 
76
  # beetype, sourcetype, sourcecount, filename1, filename2
77
- _ = map(partial(getattr, state.ns), state.ns.list[:5])
78
  logger.debug(" state.ns.list[:3]: %s", str([*_]))
79
 
80
  # st.write(f"run: {state.ns.count}")
 
1
  """Prep Settings/Options page."""
2
  # pylint: disable=invalid-name
3
  from functools import partial
4
+
5
  import streamlit as st
6
  from loguru import logger as loggu
7
  from logzero import logger
 
11
  def settings():
12
  """Prep Settings/Options page.
13
 
14
+ Refer to options.py
15
+ """
16
  # horizotal radio
17
  st.write(
18
  "<style>div.row-widget.stRadio > div{flex-direction:row;}</style>",
 
26
  # col1, col2 = st.columns(2)
27
 
28
  # with col1:
29
+ _ = "ezbee: english-chinese; dzbee: german-chinese, debee: german-english; xbee: other language pairs (slow, approx.1000 pairs/3 min) | ezbee: 英/中; dzbee: 德/中, debee: 德/英; xbee: 其他语言对(慢, 约1000对/3分钟)"
30
  try:
31
  index = beetype_list.index(state.ns.beetype)
32
  except Exception as e:
 
37
  beetype_list,
38
  index=index,
39
  format_func=lambda x: f"{x:<7} |",
40
+ help=_,
41
  )
42
  state.ns.beetype = beetype
43
 
 
73
  )
74
  state.ns.sourcecount = sourcecount
75
 
76
+ sentali_list = [None, "fast", "slow"]
77
+ try:
78
+ index = sentali_list.index(state.ns.sentali)
79
+ except Exception as e:
80
+ logger.error("sentali index error: %s, setting to 0", e)
81
+ index = 0
82
+ sentali = st.radio(
83
+ "Sent Align",
84
+ sentali_list,
85
+ index=index,
86
+ format_func=lambda x: f"{str(x):<4} |",
87
+ help="None: no sent align; fast: gale-church; slow: machine-learning",
88
+ disabled=True,
89
+ )
90
+ state.ns.sentali = sentali
91
+
92
  # show state.ns[:6]
93
  loggu.debug(f" state.ns.list: {state.ns.list}")
94
 
95
  # beetype, sourcetype, sourcecount, filename1, filename2
96
+ _ = map(partial(getattr, state.ns), state.ns.list[:6])
97
  logger.debug(" state.ns.list[:3]: %s", str([*_]))
98
 
99
  # st.write(f"run: {state.ns.count}")
litbee/t2s.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Convert list to simlified Chinese for traditional Chinese, do nothing otherwise."""
2
+ # pylint: disable=invalid-name
3
+ from typing import List
4
+ import hanzidentifier
5
+ from logzero import logger
6
+ from opencc import OpenCC
7
+
8
+ convert = OpenCC('t2s').convert
9
+
10
+
11
+ def t2s(lst: List[str]) -> List[str]:
12
+ """Convert list to simlified Chinese for traditional Chinese, do nothing otherwise.
13
+
14
+ Args:
15
+ list of strings
16
+
17
+ Returns:
18
+ list of strings
19
+ """
20
+ try:
21
+ # lst[:1000] strim down for extremely large docs
22
+ _ = hanzidentifier.identify(" ".join(lst[:1000]))
23
+ except Exception as e:
24
+ logger.warning("hanzidentifier.is_traditional error: %s, settin to simplified.", e)
25
+ _ = hanzidentifier.SIMP # 2: simplified
26
+
27
+ if _ not in [hanzidentifier.TRAD, hanzidentifier.MIXED]:
28
+ return lst
29
+
30
+ res = []
31
+ for line in lst:
32
+ try:
33
+ _ = convert(line)
34
+ except Exception as e:
35
+ logger.warning("ts2 error: %s, setting to original", e)
36
+ _ = line
37
+ res.append(_)
38
+
39
+ return res
litbee/utils.py CHANGED
@@ -40,7 +40,7 @@ msg = dedent(
40
 
41
 
42
  def sb_front_cover():
43
- """Prep front cover for sidebar"""
44
  st.sidebar.markdown(f"### litbee {__version__} ")
45
 
46
  sb_tit_expander = st.sidebar.expander("More info (click to toggle)", expanded=False)
@@ -65,6 +65,7 @@ intructins = dedent(
65
 
66
 
67
  def instructions():
 
68
  logger.debug("instructions entry")
69
  back_cover_expander = st.expander("Instructions")
70
  with back_cover_expander:
 
40
 
41
 
42
  def sb_front_cover():
43
+ """Prep front cover for sidebar."""
44
  st.sidebar.markdown(f"### litbee {__version__} ")
45
 
46
  sb_tit_expander = st.sidebar.expander("More info (click to toggle)", expanded=False)
 
65
 
66
 
67
  def instructions():
68
+ """Prep msg."""
69
  logger.debug("instructions entry")
70
  back_cover_expander = st.expander("Instructions")
71
  with back_cover_expander:
poetry.lock CHANGED
@@ -519,6 +519,17 @@ python-versions = "*"
519
  [package.extras]
520
  test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
521
 
 
 
 
 
 
 
 
 
 
 
 
522
  [[package]]
523
  name = "icecream"
524
  version = "2.1.2"
@@ -1038,6 +1049,14 @@ category = "main"
1038
  optional = false
1039
  python-versions = ">=3.8"
1040
 
 
 
 
 
 
 
 
 
1041
  [[package]]
1042
  name = "packaging"
1043
  version = "21.3"
@@ -1129,6 +1148,17 @@ python-versions = ">=3.7"
1129
  docs = ["olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinx-rtd-theme (>=1.0)", "sphinxext-opengraph"]
1130
  tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
1131
 
 
 
 
 
 
 
 
 
 
 
 
1132
  [[package]]
1133
  name = "pluggy"
1134
  version = "1.0.0"
@@ -1689,6 +1719,17 @@ python-versions = ">=3.6"
1689
  [package.dependencies]
1690
  streamlit = ">=0.63"
1691
 
 
 
 
 
 
 
 
 
 
 
 
1692
  [[package]]
1693
  name = "terminado"
1694
  version = "0.15.0"
@@ -1952,6 +1993,14 @@ category = "main"
1952
  optional = false
1953
  python-versions = ">=3.4"
1954
 
 
 
 
 
 
 
 
 
1955
  [[package]]
1956
  name = "zipp"
1957
  version = "3.8.0"
@@ -1967,7 +2016,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-
1967
  [metadata]
1968
  lock-version = "1.1"
1969
  python-versions = "^3.8.3"
1970
- content-hash = "0001273edc2530f75a45f1baa8b689876cba18a12373a5f50776c6d758fbaee2"
1971
 
1972
  [metadata.files]
1973
  about-time = [
@@ -2259,6 +2308,9 @@ gitpython = [
2259
  grapheme = [
2260
  {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
2261
  ]
 
 
 
2262
  icecream = [
2263
  {file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
2264
  {file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
@@ -2489,6 +2541,10 @@ numpy = [
2489
  {file = "numpy-1.22.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207"},
2490
  {file = "numpy-1.22.4.zip", hash = "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af"},
2491
  ]
 
 
 
 
2492
  packaging = [
2493
  {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
2494
  {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
@@ -2576,6 +2632,10 @@ pillow = [
2576
  {file = "Pillow-9.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:baf3be0b9446a4083cc0c5bb9f9c964034be5374b5bc09757be89f5d2fa247b8"},
2577
  {file = "Pillow-9.1.1.tar.gz", hash = "sha256:7502539939b53d7565f3d11d87c78e7ec900d3c72945d4ee0e2f250d598309a0"},
2578
  ]
 
 
 
 
2579
  pluggy = [
2580
  {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
2581
  {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
@@ -3107,6 +3167,10 @@ streamlit-option-menu = [
3107
  {file = "streamlit-option-menu-0.3.2.tar.gz", hash = "sha256:69d1aef6f30f83f29eda3dc9667733bc2e28cd640eb17b4b6ca315f633484c52"},
3108
  {file = "streamlit_option_menu-0.3.2-py3-none-any.whl", hash = "sha256:0b7eae3ffdb0276c81d15750465c72957d57d2f766cb027c586d053519731178"},
3109
  ]
 
 
 
 
3110
  terminado = [
3111
  {file = "terminado-0.15.0-py3-none-any.whl", hash = "sha256:0d5f126fbfdb5887b25ae7d9d07b0d716b1cc0ccaacc71c1f3c14d228e065197"},
3112
  {file = "terminado-0.15.0.tar.gz", hash = "sha256:ab4eeedccfcc1e6134bfee86106af90852c69d602884ea3a1e8ca6d4486e9bfe"},
@@ -3263,6 +3327,9 @@ xlsxwriter = [
3263
  {file = "XlsxWriter-3.0.3-py3-none-any.whl", hash = "sha256:df0aefe5137478d206847eccf9f114715e42aaea077e6a48d0e8a2152e983010"},
3264
  {file = "XlsxWriter-3.0.3.tar.gz", hash = "sha256:e89f4a1d2fa2c9ea15cde77de95cd3fd8b0345d0efb3964623f395c8c4988b7f"},
3265
  ]
 
 
 
3266
  zipp = [
3267
  {file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
3268
  {file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
 
519
  [package.extras]
520
  test = ["pytest", "sphinx", "sphinx-autobuild", "wheel", "twine"]
521
 
522
+ [[package]]
523
+ name = "hanzidentifier"
524
+ version = "1.0.2"
525
+ description = "Python module that identifies Chinese text as Simplified or Traditional."
526
+ category = "main"
527
+ optional = false
528
+ python-versions = "*"
529
+
530
+ [package.dependencies]
531
+ zhon = ">=1.1.3"
532
+
533
  [[package]]
534
  name = "icecream"
535
  version = "2.1.2"
 
1049
  optional = false
1050
  python-versions = ">=3.8"
1051
 
1052
+ [[package]]
1053
+ name = "opencc-python-reimplemented"
1054
+ version = "0.1.6"
1055
+ description = "OpenCC made with Python"
1056
+ category = "main"
1057
+ optional = false
1058
+ python-versions = "*"
1059
+
1060
  [[package]]
1061
  name = "packaging"
1062
  version = "21.3"
 
1148
  docs = ["olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinx-rtd-theme (>=1.0)", "sphinxext-opengraph"]
1149
  tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
1150
 
1151
+ [[package]]
1152
+ name = "plotly"
1153
+ version = "5.8.0"
1154
+ description = "An open-source, interactive data visualization library for Python"
1155
+ category = "main"
1156
+ optional = false
1157
+ python-versions = ">=3.6"
1158
+
1159
+ [package.dependencies]
1160
+ tenacity = ">=6.2.0"
1161
+
1162
  [[package]]
1163
  name = "pluggy"
1164
  version = "1.0.0"
 
1719
  [package.dependencies]
1720
  streamlit = ">=0.63"
1721
 
1722
+ [[package]]
1723
+ name = "tenacity"
1724
+ version = "8.0.1"
1725
+ description = "Retry code until it succeeds"
1726
+ category = "main"
1727
+ optional = false
1728
+ python-versions = ">=3.6"
1729
+
1730
+ [package.extras]
1731
+ doc = ["reno", "sphinx", "tornado (>=4.5)"]
1732
+
1733
  [[package]]
1734
  name = "terminado"
1735
  version = "0.15.0"
 
1993
  optional = false
1994
  python-versions = ">=3.4"
1995
 
1996
+ [[package]]
1997
+ name = "zhon"
1998
+ version = "1.1.5"
1999
+ description = "Zhon provides constants used in Chinese text processing."
2000
+ category = "main"
2001
+ optional = false
2002
+ python-versions = "*"
2003
+
2004
  [[package]]
2005
  name = "zipp"
2006
  version = "3.8.0"
 
2016
  [metadata]
2017
  lock-version = "1.1"
2018
  python-versions = "^3.8.3"
2019
+ content-hash = "eabc5b9c944b380d2a60b4ec1b1f218f4b1a3aea1426c7fb75fdc51d4889e57e"
2020
 
2021
  [metadata.files]
2022
  about-time = [
 
2308
  grapheme = [
2309
  {file = "grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca"},
2310
  ]
2311
+ hanzidentifier = [
2312
+ {file = "hanzidentifier-1.0.2.tar.gz", hash = "sha256:793a298430aa9a9d6ab344dc0ca0ab4bd1161d88c7da941d6554571093003cba"},
2313
+ ]
2314
  icecream = [
2315
  {file = "icecream-2.1.2-py2.py3-none-any.whl", hash = "sha256:04b9cea4d9931cf6960db0430ebf11fa34464ce7152e384ddf50f96d25b201b4"},
2316
  {file = "icecream-2.1.2.tar.gz", hash = "sha256:09300b2d1c678712410cbd47c95198eb1b580f66f311a554ccd6b9e758ece0ee"},
 
2541
  {file = "numpy-1.22.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0791fbd1e43bf74b3502133207e378901272f3c156c4df4954cad833b1380207"},
2542
  {file = "numpy-1.22.4.zip", hash = "sha256:425b390e4619f58d8526b3dcf656dde069133ae5c240229821f01b5f44ea07af"},
2543
  ]
2544
+ opencc-python-reimplemented = [
2545
+ {file = "opencc-python-reimplemented-0.1.6.tar.gz", hash = "sha256:6e4eaae2bd6b04d5c1b5bd7f3f87904ba2f1caa982203fdf2610c8261e47ee24"},
2546
+ {file = "opencc_python_reimplemented-0.1.6-py3.8.egg", hash = "sha256:3071d7ddcecc1b5129434e713e35f73aab9f5bd507d728c908acdbb48879194d"},
2547
+ ]
2548
  packaging = [
2549
  {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"},
2550
  {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"},
 
2632
  {file = "Pillow-9.1.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:baf3be0b9446a4083cc0c5bb9f9c964034be5374b5bc09757be89f5d2fa247b8"},
2633
  {file = "Pillow-9.1.1.tar.gz", hash = "sha256:7502539939b53d7565f3d11d87c78e7ec900d3c72945d4ee0e2f250d598309a0"},
2634
  ]
2635
+ plotly = [
2636
+ {file = "plotly-5.8.0-py2.py3-none-any.whl", hash = "sha256:0e6e2382aafe2b2978d2c1b10ea93e73ad1ec80fa9a195ff6eea62af7905dfdc"},
2637
+ {file = "plotly-5.8.0.tar.gz", hash = "sha256:58cef3292f5994d82154d51fbc7338c48009fc47ea32ffe052ad29aaa15e0df9"},
2638
+ ]
2639
  pluggy = [
2640
  {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
2641
  {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
 
3167
  {file = "streamlit-option-menu-0.3.2.tar.gz", hash = "sha256:69d1aef6f30f83f29eda3dc9667733bc2e28cd640eb17b4b6ca315f633484c52"},
3168
  {file = "streamlit_option_menu-0.3.2-py3-none-any.whl", hash = "sha256:0b7eae3ffdb0276c81d15750465c72957d57d2f766cb027c586d053519731178"},
3169
  ]
3170
+ tenacity = [
3171
+ {file = "tenacity-8.0.1-py3-none-any.whl", hash = "sha256:f78f4ea81b0fabc06728c11dc2a8c01277bfc5181b321a4770471902e3eb844a"},
3172
+ {file = "tenacity-8.0.1.tar.gz", hash = "sha256:43242a20e3e73291a28bcbcacfd6e000b02d3857a9a9fff56b297a27afdc932f"},
3173
+ ]
3174
  terminado = [
3175
  {file = "terminado-0.15.0-py3-none-any.whl", hash = "sha256:0d5f126fbfdb5887b25ae7d9d07b0d716b1cc0ccaacc71c1f3c14d228e065197"},
3176
  {file = "terminado-0.15.0.tar.gz", hash = "sha256:ab4eeedccfcc1e6134bfee86106af90852c69d602884ea3a1e8ca6d4486e9bfe"},
 
3327
  {file = "XlsxWriter-3.0.3-py3-none-any.whl", hash = "sha256:df0aefe5137478d206847eccf9f114715e42aaea077e6a48d0e8a2152e983010"},
3328
  {file = "XlsxWriter-3.0.3.tar.gz", hash = "sha256:e89f4a1d2fa2c9ea15cde77de95cd3fd8b0345d0efb3964623f395c8c4988b7f"},
3329
  ]
3330
+ zhon = [
3331
+ {file = "zhon-1.1.5.tar.gz", hash = "sha256:793723575c46f10ace8846c579ce740b04c73e2aa583e04e000aedbd4a47f87f"},
3332
+ ]
3333
  zipp = [
3334
  {file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"},
3335
  {file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"},
pylintrc ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [EXCEPTIONS]
2
+ # C0301 line too long
3
+ # R0801 dup
4
+ # C0103 invalid-name
5
+ # W0612 unused-variable
6
+ # W0611 unused-import
7
+ disable=W0703,R0801,C0103,C0301,W0612,W0611
pyproject.toml CHANGED
@@ -22,6 +22,9 @@ streamlit = "^1.9.2"
22
  debee = "^0.1.0-alpha.2"
23
  ezbee = "^0.1.0"
24
  streamlit-option-menu = "^0.3.2"
 
 
 
25
 
26
  [tool.poe.executor]
27
  type = "poetry"
@@ -33,7 +36,9 @@ build = "poetry build"
33
  _publish = "poetry publish"
34
  release = ["test", "build", "_publish"]
35
  lint = { cmd = "pylint litbee" }
36
- format = "black tests litbee"
 
 
37
  docstyle = "pydocstyle --convention=google tests litbee"
38
  tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
39
 
 
22
  debee = "^0.1.0-alpha.2"
23
  ezbee = "^0.1.0"
24
  streamlit-option-menu = "^0.3.2"
25
+ plotly = "^5.8.0"
26
+ hanzidentifier = "^1.0.2"
27
+ opencc-python-reimplemented = "^0.1.6"
28
 
29
  [tool.poe.executor]
30
  type = "poetry"
 
36
  _publish = "poetry publish"
37
  release = ["test", "build", "_publish"]
38
  lint = { cmd = "pylint litbee" }
39
+ isort = "isort tests litbee"
40
+ black = "black tests litbee"
41
+ format = ["isort", "black"]
42
  docstyle = "pydocstyle --convention=google tests litbee"
43
  tunnel = {cmd = "ssh -CN ip_or_hostname_defined_in_hosts -L 9091:127.0.0.1:9091"}
44
 
run-flake8.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ flake8 --ignore F401,E501,F841
tests/test_t2s.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test t2s."""
2
+ from litbee.t2s import t2s
3
+
4
+
5
+ def test_t2s1():
6
+ """Test trivial t2s."""
7
+ assert t2s(["", ""]) == ["", ""]
8
+ assert t2s(["a\nb", ""]) == ["a\nb", ""]
9
+ assert t2s(["a\n\nb\n", ""]) == ["a\n\nb\n", ""]
10
+
11
+
12
+ def test_t2s2():
13
+ """Test t2s."""
14
+ assert t2s(["需攜帶", "需攜帶"]) == ["需携带", "需携带"]
15
+ assert t2s(["需攜帶\n\n需攜帶\n", "需携带\n"]) == ["需携带\n\n需携带\n", "需携带\n"]