Spaces:

Sambhavnoobcoder
/

sentiment_analysis_in_text

Sleeping

App Files Files Community

Sambhavnoobcoder commited on Apr 29, 2023

Commit

4ce54e2

1 Parent(s): 72cc2a3

Russian code adaptation

Browse files

Files changed (1) hide show

app.py +58 -33

app.py CHANGED Viewed

@@ -1,36 +1,61 @@
 import gradio as gr
 import tensorflow as tf
-# Load the saved model
-model = tf.keras.models.load_model("sentimentality.h5")
-def preprocess(text):
-      # Tokenize the text into a list of words
-      words = text.strip().lower().split()
-      # Load the vocabulary
-      with open('vocabulary.txt', 'r') as f:
-          vocab = f.read().splitlines()
-      # Convert the words to indices in the vocabulary
-      word_indices = [vocab.index(word) if word in vocab else 0 for word in words]
-      # Pad the sequence with zeros to a fixed length of 500
-      padded_indices = np.zeros(500, dtype=np.int32)
-      padded_indices[:len(word_indices)] = word_indices
-      # Convert the sequence to a tensor
-      tensor = np.expand_dims(padded_indices, axis=0)
-      return tensor
-def predict_sentiment(text):
-    # preprocess input text
-    processed_text = preprocess(text)
-    # predict sentiment
-    prediction = model.predict([processed_text])[0][0]
-    sentiment = 'positive' if prediction >= 0.5 else 'negative'
-    return sentiment
-iface = gr.Interface(fn=predict_sentiment,
-                     inputs=gr.inputs.Textbox(label='Input Text'),
-                     outputs=gr.outputs.Label(label='Sentiment Prediction'))
-iface.launch()

 import gradio as gr
 import tensorflow as tf
+from keras_preprocessing.text import tokenizer_from_json
+import numpy as np
+import nltk
+nltk.download('stopwords')
+nltk.download('wordnet')
+from nltk.corpus import stopwords
+import re
+from nltk.stem import WordNetLemmatizer
+stemmer = WordNetLemmatizer()
+from keras.preprocessing.text import Tokenizer
+from keras import backend as K
+#Завантажуємо наші моделі
+def recall(y_true, y_pred):
+    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
+    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
+    recall = true_positives / (possible_positives + K.epsilon())
+    return recall
+def precision(y_true, y_pred):
+    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
+    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
+    precision = true_positives / (predicted_positives + K.epsilon())
+    return precision
+def f1(y_true, y_pred):
+    p = precision(y_true, y_pred)
+    r = recall(y_true, y_pred)
+    return 2 * ((p * r) / (p + r))
+def accuracy(y_true, y_pred):
+    return K.mean(K.equal(y_true, K.round(y_pred)), axis=1)
+with open('tokenizer.json', 'r', encoding='utf-8') as f:
+    tokenizer_config = f.read()
+tokenizer = tokenizer_from_json(tokenizer_config)
+model = tf.keras.models.load_model("sentimentality.h5", custom_objects={'f1':f1, 'recall': recall, 'precision': precision, 'accuracy':accuracy})
+def get_sentiment(text):
+    global model
+    text = re.sub(r"@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+", ' ', str(text))
+    text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text)
+    text = re.sub(r'\s+', ' ', text, flags=re.I)
+    text = text.lower()
+    text = text.split()
+    text = [stemmer.lemmatize(word) for word in text]
+    text = ' '.join(text)
+    text = tokenizer.texts_to_sequences([text])[0]
+    text += [0] * (200 - len(text))
+    text = np.array(text).reshape(-1, 200)
+    x = model.predict(text).tolist()[0][0]
+    return ('Positive' if x >= 0.5 else 'negative') + ' sentiment!'
+interface = gr.Interface(fn = get_sentiment, inputs = 'text', outputs = 'text', title = 'Kotsko Kyrylo IPS-42 Sentiment140', description = 'Kotsko Kyrylo IPS-42 Sentiment140')
+interface.launch(share=False)