Learn practical skills, build real-world projects, and advance your career

AYODHYAVERDICT Nov 8th 2019 to Nov 17 2019 reviews dataset

TweetScraper is built on Scrapy,pymongo, mysql-connector, configparser without using Twitter's APIs. In root folder after running below syntax i scrapped all the twitter reviews from 8th nov to 17th nov. In reviews dataset all the information of tweet content and user information is mentioned.

## scrapy crawl TweetScraper -a query="(#AYODHYAVERDICT) lang:en until:2019-11-17 since:2019-11-08"
import pandas as pd       #necesaary library files are imported for doing different analysis
import numpy as np

# Options for pandas
pd.options.display.max_columns = None
pd.options.display.max_rows = None

pd.options.display.max_colwidth=-1

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython import get_ipython
ipython = get_ipython()

# autoreload extension
if 'autoreload' not in ipython.extension_manager.loaded:
    %load_ext autoreload

%autoreload 2

# Visualizations
import matplotlib.pyplot as plt
import seaborn as sns
import spacy
import string

import re
#nltk library
import nltk
from nltk.corpus import stopwords
from nltk import PorterStemmer as stemmer
from nltk.tokenize import word_tokenize
from nltk import download, FreqDist, WordNetLemmatizer
from nltk.stem import SnowballStemmer, WordNetLemmatizer
from nltk.stem.porter import *
from nltk.tokenize.regexp import wordpunct_tokenize

from collections import Counter
from IPython.display import clear_output

#sklearn libraries
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import normalize
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
from sklearn.feature_extraction.text import CountVectorizer

import datetime as dt

from matplotlib import rcParams
lemmatizer = WordNetLemmatizer()
from datetime import datetime
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

#gensim library
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS

ps=stemmer()
import os
%pwd
'C:\\Users\\Sonali MJ\\Documents\\Python Scripts\\Social media analytics'
df=pd.read_csv("Tweets.csv") # convert csv file into dataframe