Sambhavnoobcoder commited on
Commit
4ce54e2
·
1 Parent(s): 72cc2a3

Russian code adaptation

Browse files
Files changed (1) hide show
  1. app.py +58 -33
app.py CHANGED
@@ -1,36 +1,61 @@
1
  import gradio as gr
2
  import tensorflow as tf
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- # Load the saved model
5
- model = tf.keras.models.load_model("sentimentality.h5")
6
-
7
- def preprocess(text):
8
- # Tokenize the text into a list of words
9
- words = text.strip().lower().split()
10
- # Load the vocabulary
11
- with open('vocabulary.txt', 'r') as f:
12
- vocab = f.read().splitlines()
13
- # Convert the words to indices in the vocabulary
14
- word_indices = [vocab.index(word) if word in vocab else 0 for word in words]
15
- # Pad the sequence with zeros to a fixed length of 500
16
- padded_indices = np.zeros(500, dtype=np.int32)
17
- padded_indices[:len(word_indices)] = word_indices
18
- # Convert the sequence to a tensor
19
- tensor = np.expand_dims(padded_indices, axis=0)
20
- return tensor
21
-
22
- def predict_sentiment(text):
23
- # preprocess input text
24
- processed_text = preprocess(text)
25
-
26
- # predict sentiment
27
- prediction = model.predict([processed_text])[0][0]
28
- sentiment = 'positive' if prediction >= 0.5 else 'negative'
29
-
30
- return sentiment
31
-
32
- iface = gr.Interface(fn=predict_sentiment,
33
- inputs=gr.inputs.Textbox(label='Input Text'),
34
- outputs=gr.outputs.Label(label='Sentiment Prediction'))
35
-
36
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import tensorflow as tf
3
+ from keras_preprocessing.text import tokenizer_from_json
4
+ import numpy as np
5
+ import nltk
6
+ nltk.download('stopwords')
7
+ nltk.download('wordnet')
8
+ from nltk.corpus import stopwords
9
+ import re
10
+ from nltk.stem import WordNetLemmatizer
11
+ stemmer = WordNetLemmatizer()
12
+ from keras.preprocessing.text import Tokenizer
13
+ from keras import backend as K
14
 
15
+ #Завантажуємо наші моделі
16
+
17
+ def recall(y_true, y_pred):
18
+ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
19
+ possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
20
+ recall = true_positives / (possible_positives + K.epsilon())
21
+ return recall
22
+
23
+ def precision(y_true, y_pred):
24
+ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
25
+ predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
26
+ precision = true_positives / (predicted_positives + K.epsilon())
27
+ return precision
28
+
29
+ def f1(y_true, y_pred):
30
+ p = precision(y_true, y_pred)
31
+ r = recall(y_true, y_pred)
32
+ return 2 * ((p * r) / (p + r))
33
+
34
+ def accuracy(y_true, y_pred):
35
+ return K.mean(K.equal(y_true, K.round(y_pred)), axis=1)
36
+
37
+ with open('tokenizer.json', 'r', encoding='utf-8') as f:
38
+ tokenizer_config = f.read()
39
+
40
+ tokenizer = tokenizer_from_json(tokenizer_config)
41
+
42
+ model = tf.keras.models.load_model("sentimentality.h5", custom_objects={'f1':f1, 'recall': recall, 'precision': precision, 'accuracy':accuracy})
43
+
44
+ def get_sentiment(text):
45
+ global model
46
+ text = re.sub(r"@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+", ' ', str(text))
47
+ text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text)
48
+ text = re.sub(r'\s+', ' ', text, flags=re.I)
49
+ text = text.lower()
50
+ text = text.split()
51
+ text = [stemmer.lemmatize(word) for word in text]
52
+ text = ' '.join(text)
53
+ text = tokenizer.texts_to_sequences([text])[0]
54
+ text += [0] * (200 - len(text))
55
+ text = np.array(text).reshape(-1, 200)
56
+ x = model.predict(text).tolist()[0][0]
57
+ return ('Positive' if x >= 0.5 else 'negative') + ' sentiment!'
58
+
59
+ interface = gr.Interface(fn = get_sentiment, inputs = 'text', outputs = 'text', title = 'Kotsko Kyrylo IPS-42 Sentiment140', description = 'Kotsko Kyrylo IPS-42 Sentiment140')
60
+
61
+ interface.launch(share=False)