Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| # # Text Based Sentiment Analysis | |
| # # IMPORTING NECESSARY MODULES | |
| # In[1]: | |
| import numpy as np # For linear algebra | |
| import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv) | |
| import matplotlib.pyplot as plt # For Visualisation | |
| # get_ipython().run_line_magic('matplotlib', 'inline') | |
| import seaborn as sns # For Visualisation | |
| from bs4 import BeautifulSoup # For Text Parsing | |
| # # IMPORTING DATASET | |
| # In[2]: | |
| data = pd.read_csv('Reviews.csv') | |
| # data | |
| # # DATA PREPROCESSING & VISUALISATION | |
| # In[3]: | |
| #data.isnull().sum() | |
| # In[4]: | |
| data=data.dropna() | |
| #data.isnull().sum() | |
| # In[5]: | |
| #data.shape | |
| # In[6]: | |
| score_unique = data['Score'].unique() | |
| #print(score_unique) | |
| # In[7]: | |
| # 0-> NEGATIVE REVIEW | |
| # 1-> NEUTRAL REVIEW | |
| # 2-> POSTIVE REVIEW | |
| a=[] | |
| for i in data['Score']: | |
| if i <3: | |
| a.append(0) | |
| if i==3: | |
| a.append(1) | |
| if i>3: | |
| a.append(2) | |
| # In[8]: | |
| r_0, r_1, r_2 = 0, 0, 0 | |
| for i in a: | |
| if i == 0: | |
| r_0 += 1 | |
| elif i == 1: | |
| r_1 += 1 | |
| else: | |
| r_2 += 1 | |
| # print('Negative Reviews:',r_0) | |
| # print('Neutral Reviews:',r_1) | |
| # print('Positive Reviews:',r_2) | |
| # In[9]: | |
| # sns.countplot(a) | |
| # plt.xlabel('Reviews', color = 'red') | |
| # plt.ylabel('Count', color = 'red') | |
| # plt.xticks([0,1,2],['Negative','Neutral','Positive']) | |
| # plt.title('COUNT PLOT', color = 'r') | |
| # plt.show() | |
| # In[10]: | |
| data['sentiment']=a | |
| #data | |
| final_dataset = data[['Text','sentiment']] | |
| #final_dataset | |
| # In[11]: | |
| data_p=final_dataset[data['sentiment']==2] | |
| data_n=final_dataset[data['sentiment']==0] | |
| #len(data_p), len(data_n) | |
| # In[12]: | |
| datap = data_p.iloc[np.random.randint(1,443766,5000), :] | |
| datan = data_n.iloc[np.random.randint(1, 82007,5000), :] | |
| #len(datan), len(datap) | |
| # In[13]: | |
| data = pd.concat([datap,datan]) | |
| len(data) | |
| # In[14]: | |
| c=[] | |
| for i in data['sentiment']: | |
| if i==0: | |
| c.append(0) | |
| if i==2: | |
| c.append(1) | |
| data['sentiment']=c | |
| # In[15]: | |
| # sns.countplot(data['sentiment']) | |
| # plt.show() | |
| # In[16]: | |
| def strip_html(text): | |
| soup = BeautifulSoup(text, "html.parser") | |
| return soup.get_text() | |
| data['review'] = data['Text'].apply(strip_html) | |
| data=data.drop('Text',axis=1) | |
| #data.head() | |
| # # MODEL BUILDING | |
| # In[17]: | |
| import nltk #Natural Language Processing Toolkit | |
| def punc_clean(text): | |
| import string as st | |
| a=[w for w in text if w not in st.punctuation] | |
| return ''.join(a) | |
| data['review'] = data['review'].apply(punc_clean) | |
| #data.head(2) | |
| # In[18]: | |
| def remove_stopword(text): | |
| stopword=nltk.corpus.stopwords.words('english') | |
| stopword.remove('not') | |
| a=[w for w in nltk.word_tokenize(text) if w not in stopword] | |
| return ' '.join(a) | |
| #data['review'] = data['review'].apply(remove_stopword) | |
| # In[19]: | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1) | |
| vectr.fit(data['review']) | |
| vect_X = vectr.transform(data['review']) | |
| # In[20]: | |
| from sklearn.linear_model import LogisticRegression | |
| model = LogisticRegression() | |
| clf=model.fit(vect_X,data['sentiment']) | |
| #clf.score(vect_X,data['sentiment'])*100 | |
| # # PREDICTION | |
| # In[21]: | |
| clf.predict(vectr.transform(['''Nice look and build quality with moderately fast everything such as refresh rate, display quality, sound, processing, gaming experience and many more .. | |
| I didn't find any lagging or heating issue..And battery health I won't say great but I'll take that | |
| Only cons I can say about it is camera.. sharpening picture a little much at day light and low light photo you have to compromise.'''])) | |
| # In[22]: | |
| clf.predict(vectr.transform(['''Phone has bugs , and screen quality is poor , Avoid realme. Gaming was just over hyped'''])) | |
| # In[23]: | |
| clf.predict(vectr.transform(['''No lags found super speed and very good performance nice phone in this budget'''])) | |
| # In[ ]: | |