Learn practical skills, build real-world projects, and advance your career
Created 5 years ago
Text to Numeric using sklearn feature extraction
Ref: https://github.com/justmarkham/pycon-2016-tutorial/blob/master/tutorial.ipynb
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
import scikitplot as skp
# Suppress Warning
import warnings
warnings.filterwarnings("ignore")
sample dataset
Each element is treated as document
sample_data = ["This is test1","This is test2","This is another line with test3","Yet another line with test4",
"yet again another line with test5"]
sample_data
['This is test1',
'This is test2',
'This is another line with test3',
'Yet another line with test4',
'yet again another line with test5']