Learn practical skills, build real-world projects, and advance your career
Updated 4 years ago
Data Science Tweets
A scraper that implements the scraping of tweets about data science in parallel.
# Install the twitter_scraper package using pip3
! pip3 install -q twitter_scraper
! pip install -q jovian
from twitter_scraper import get_tweets
from multiprocessing import Pool
import pandas as pd
from ast import literal_eval
from tqdm.notebook import tqdm
from time import sleep
import jovian
# List of hashtags that we're interested in
keywords = ['machinelearning', 'ML', 'deeplearning',
'#artificialintelligence', '#NLP', 'computervision', 'AI',
'tensorflow', 'pytorch', "sklearn", "pandas", "plotly",
"spacy", "fastai", 'datascience', 'dataanalysis']
# Lets run one iteration to understand how to implement this library
tweets = get_tweets("#machinelearning", pages = 5)
tweets_df = pd.DataFrame()
# Lets print the keys and values obtained
for tweet in tweets:
print('Keys:', list(tweet.keys()), '\n')
break
# Running the code for one keyword and extracting the relevant data
for tweet in tweets:
_ = pd.DataFrame({'text' : [tweet['text']],
'isRetweet' : tweet['isRetweet'],
'replies' : tweet['replies'],
'retweets' : tweet['retweets'],
'likes' : tweet['likes']
})
tweets_df = tweets_df.append(_, ignore_index = True)
tweets_df.head()
Keys: ['tweetId', 'isRetweet', 'time', 'text', 'replies', 'retweets', 'likes', 'entries']