Learn practical skills, build real-world projects, and advance your career
import pandas as pd
msg=pd.read_csv('datasets/6.csv',names=['message','label'])
print('The dimensions of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X)
print(y)
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print(xtest.shape)
print(xtrain.shape)
print(ytest.shape)
print(ytrain.shape)

from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)
from sklearn import metrics
print('Accuracy metrics')
print('Accuracy of the classifer is',metrics.accuracy_score(ytest,predicted))
print('Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('Recall and Precison ')
print(metrics.recall_score(ytest,predicted))
print(metrics.precision_score(ytest,predicted))
The dimensions of the dataset (19, 2) 0 I love this sandwich 1 This is an amazing place 2 I feel very good about these beers 3 This is my best work 4 What an awesome view 5 I do not like this restaurant 6 I am tired of this stuff 7 I can't deal with this 8 He is my sworn enemy 9 My boss is horrible 10 This is an awesome place 11 I do not like the taste of this juice 12 I love to dance 13 I am sick and tired of this place 14 What a great holiday 15 That is a bad locality to stay 16 We will have good fun tomorrow 17 I went to my enemy's house today 18 i like to eat food Name: message, dtype: object 0 1 1 1 2 1 3 1 4 1 5 0 6 0 7 0 8 0 9 0 10 1 11 0 12 1 13 0 14 1 15 0 16 1 17 0 18 1 Name: labelnum, dtype: int64 (5,) (14,) (5,) (14,) Accuracy metrics Accuracy of the classifer is 0.8 Confusion matrix [[2 1] [0 2]] Recall and Precison 1.0 0.6666666666666666