ppsingh commited on
Commit
8fd31d9
·
verified ·
1 Parent(s): 717d69f

Update appStore/adapmit.py

Browse files
Files changed (1) hide show
  1. appStore/adapmit.py +39 -172
appStore/adapmit.py CHANGED
@@ -1,172 +1,39 @@
1
- # set path
2
- import glob, os, sys
3
- sys.path.append('../utils')
4
-
5
- #import needed libraries
6
- import seaborn as sns
7
- import matplotlib.pyplot as plt
8
- import numpy as np
9
- import pandas as pd
10
- import streamlit as st
11
- from utils.adapmit_classifier import load_adapmitClassifier,adapmit_classification
12
- # from utils.keyword_extraction import textrank
13
- import logging
14
- logger = logging.getLogger(__name__)
15
- from utils.config import get_classifier_params
16
- from utils.preprocessing import paraLengthCheck
17
- from io import BytesIO
18
- import xlsxwriter
19
- import plotly.express as px
20
-
21
- # Declare all the necessary variables
22
- classifier_identifier = 'adapmit'
23
- params = get_classifier_params(classifier_identifier)
24
-
25
-
26
- def app():
27
- ### Main app code ###
28
- with st.container():
29
- if 'key1' in st.session_state:
30
- df = st.session_state.key1
31
-
32
- classifier = load_adapmitClassifier(classifier_name=params['model_name'])
33
- st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
34
- if sum(df['Target Label'] == 'TARGET') > 100:
35
- warning_msg = ": This might take sometime, please sit back and relax."
36
- else:
37
- warning_msg = ""
38
-
39
- df = adapmit_classification(haystack_doc=df,
40
- threshold= params['threshold'])
41
-
42
- st.session_state.key1 = df
43
-
44
- # @st.cache_data
45
- # def to_excel(df):
46
- # len_df = len(df)
47
- # output = BytesIO()
48
- # writer = pd.ExcelWriter(output, engine='xlsxwriter')
49
- # df.to_excel(writer, index=False, sheet_name='Sheet1')
50
- # workbook = writer.book
51
- # worksheet = writer.sheets['Sheet1']
52
- # worksheet.data_validation('E2:E{}'.format(len_df),
53
- # {'validate': 'list',
54
- # 'source': ['No', 'Yes', 'Discard']})
55
- # worksheet.data_validation('F2:F{}'.format(len_df),
56
- # {'validate': 'list',
57
- # 'source': ['No', 'Yes', 'Discard']})
58
- # worksheet.data_validation('G2:G{}'.format(len_df),
59
- # {'validate': 'list',
60
- # 'source': ['No', 'Yes', 'Discard']})
61
- # writer.save()
62
- # processed_data = output.getvalue()
63
- # return processed_data
64
-
65
-
66
-
67
-
68
- # threshold= params['threshold']
69
- # truth_df = df.drop(['text'],axis=1)
70
- # truth_df = truth_df.astype(float) >= threshold
71
- # truth_df = truth_df.astype(str)
72
- # categories = list(truth_df.columns)
73
-
74
- # placeholder = {}
75
- # for val in categories:
76
- # placeholder[val] = dict(truth_df[val].value_counts())
77
- # count_df = pd.DataFrame.from_dict(placeholder)
78
- # count_df = count_df.T
79
- # count_df = count_df.reset_index()
80
- # # st.write(count_df)
81
- # placeholder = []
82
- # for i in range(len(count_df)):
83
- # placeholder.append([count_df.iloc[i]['index'],count_df['True'][i],'Yes'])
84
- # placeholder.append([count_df.iloc[i]['index'],count_df['False'][i],'No'])
85
- # count_df = pd.DataFrame(placeholder, columns = ['category','count','truth_value'])
86
- # # st.write("Total Paragraphs: {}".format(len(df)))
87
- # fig = px.bar(count_df, y='category', x='count',
88
- # color='truth_value',orientation='h', height =200)
89
- # c1, c2 = st.columns([1,1])
90
- # with c1:
91
- # st.plotly_chart(fig,use_container_width= True)
92
-
93
- # truth_df['labels'] = truth_df.apply(lambda x: {i if x[i]=='True' else None for i in categories}, axis=1)
94
- # truth_df['labels'] = truth_df.apply(lambda x: list(x['labels'] -{None}),axis=1)
95
- # # st.write(truth_df)
96
- # df = pd.concat([df,truth_df['labels']],axis=1)
97
- # st.markdown("###### Top few 'Mitigation' related paragraph/text ######")
98
- # df = df.sort_values(by = ['Mitigation'], ascending=False)
99
- # for i in range(3):
100
- # if df.iloc[i]['Mitigation'] >= 0.50:
101
- # st.write('**Result {}** (Relevancy Score: {:.2f})'.format(i+1,df.iloc[i]['Mitigation']))
102
- # st.write("\t Text: \t{}".format(df.iloc[i]['text'].replace("\n", " ")))
103
-
104
- # st.markdown("###### Top few 'Adaptation' related paragraph/text ######")
105
- # df = df.sort_values(by = ['Adaptation'], ascending=False)
106
- # for i in range(3):
107
- # if df.iloc[i]['Adaptation'] > 0.5:
108
- # st.write('**Result {}** (Relevancy Score: {:.2f})'.format(i+1,df.iloc[i]['Adaptation']))
109
- # st.write("\t Text: \t{}".format(df.iloc[i]['text'].replace("\n", " ")))
110
- # # st.write(df[['text','labels']])
111
- # df['Validation'] = 'No'
112
- # df['Val-Mitigation'] = 'No'
113
- # df['Val-Adaptation'] = 'No'
114
- # df_xlsx = to_excel(df)
115
- # st.download_button(label='📥 Download Current Result',
116
- # data=df_xlsx ,
117
- # file_name= 'file_adaptation-mitigation.xlsx')
118
- # # st.session_state.key4 =
119
-
120
- # # category =set(df.columns)
121
- # # removecols = {'Validation','Val-Adaptation','Val-Mitigation','text'}
122
- # # category = list(category - removecols)
123
-
124
- # else:
125
- # st.info("🤔 No document found, please try to upload it at the sidebar!")
126
- # logging.warning("Terminated as no document provided")
127
-
128
- # # Creating truth value dataframe
129
- # if 'key4' in st.session_state:
130
- # if st.session_state.key4 is not None:
131
- # df = st.session_state.key4
132
- # st.markdown("###### Select the threshold for classifier ######")
133
- # c4, c5 = st.columns([1,1])
134
-
135
- # with c4:
136
- # threshold = st.slider("Threshold", min_value=0.00, max_value=1.0,
137
- # step=0.01, value=0.5,
138
- # help = "Keep High Value if want refined result, low if dont want to miss anything" )
139
- # category =set(df.columns)
140
- # removecols = {'Validation','Val-Adaptation','Val-Mitigation','text'}
141
- # category = list(category - removecols)
142
-
143
- # placeholder = {}
144
- # for val in category:
145
- # temp = df[val].astype(float) > threshold
146
- # temp = temp.astype(str)
147
- # placeholder[val] = dict(temp.value_counts())
148
-
149
- # count_df = pd.DataFrame.from_dict(placeholder)
150
- # count_df = count_df.T
151
- # count_df = count_df.reset_index()
152
- # placeholder = []
153
- # for i in range(len(count_df)):
154
- # placeholder.append([count_df.iloc[i]['index'],count_df['False'][i],'False'])
155
- # placeholder.append([count_df.iloc[i]['index'],count_df['True'][i],'True'])
156
-
157
- # count_df = pd.DataFrame(placeholder, columns = ['category','count','truth_value'])
158
- # fig = px.bar(count_df, x='category', y='count',
159
- # color='truth_value',
160
- # height=400)
161
- # st.write("")
162
- # st.plotly_chart(fig)
163
-
164
- # df['Validation'] = 'No'
165
- # df['Val-Mitigation'] = 'No'
166
- # df['Val-Adaptation'] = 'No'
167
- # df_xlsx = to_excel(df)
168
- # st.download_button(label='📥 Download Current Result',
169
- # data=df_xlsx ,
170
- # file_name= 'file_adaptation-mitigation.xlsx')
171
-
172
-
 
1
+ # set path
2
+ import glob, os, sys
3
+ sys.path.append('../utils')
4
+
5
+ #import needed libraries
6
+ import seaborn as sns
7
+ import matplotlib.pyplot as plt
8
+ import numpy as np
9
+ import pandas as pd
10
+ import streamlit as st
11
+ from utils.adapmit_classifier import load_adapmitClassifier,adapmit_classification
12
+ # from utils.keyword_extraction import textrank
13
+ import logging
14
+ logger = logging.getLogger(__name__)
15
+ from utils.config import get_classifier_params
16
+ from utils.preprocessing import paraLengthCheck
17
+ from io import BytesIO
18
+ import xlsxwriter
19
+ import plotly.express as px
20
+
21
+ # Declare all the necessary variables
22
+ classifier_identifier = 'adapmit'
23
+ params = get_classifier_params(classifier_identifier)
24
+
25
+
26
+ def app():
27
+ ### Main app code ###
28
+ with st.container():
29
+ if 'key1' in st.session_state:
30
+ df = st.session_state.key1
31
+
32
+ classifier = load_adapmitClassifier(classifier_name=params['model_name'])
33
+ st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
34
+
35
+ df = adapmit_classification(haystack_doc=df,
36
+ threshold= params['threshold'])
37
+
38
+ st.session_state.key1 = df
39
+