Learn practical skills, build real-world projects, and advance your career
## import statements ##
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#import missingno as
%matplotlib inline
train_data = pd.read_csv('train_tweets.csv')
train_data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 31962 entries, 0 to 31961 Data columns (total 3 columns): id 31962 non-null int64 label 31962 non-null int64 tweet 31962 non-null object dtypes: int64(2), object(1) memory usage: 749.2+ KB
train_data['label'].value_counts()
0    29720
1     2242
Name: label, dtype: int64
Cleaning the data