Learn practical skills, build real-world projects, and advance your career

Data Science Tweets

alt

A scraper that implements the scraping of tweets about data science in parallel.

# Install the twitter_scraper package using pip3
! pip3 install -q twitter_scraper 
! pip install -q jovian
from twitter_scraper import get_tweets
from multiprocessing import Pool
import pandas as pd
from ast import literal_eval
from tqdm.notebook import tqdm
from time import sleep
import jovian
# List of hashtags that we're interested in
keywords = ['machinelearning', 'ML', 'deeplearning', 
            '#artificialintelligence', '#NLP', 'computervision', 'AI', 
            'tensorflow', 'pytorch', "sklearn", "pandas", "plotly", 
            "spacy", "fastai", 'datascience', 'dataanalysis']
# Lets run one iteration to understand how to implement this library
tweets = get_tweets("#machinelearning", pages = 5)
tweets_df = pd.DataFrame()

# Lets print the keys and values obtained
for tweet in tweets:
  print('Keys:', list(tweet.keys()), '\n')
  break

# Running the code for one keyword and extracting the relevant data
for tweet in tweets:
  _ = pd.DataFrame({'text' : [tweet['text']],
                    'isRetweet' : tweet['isRetweet'],
                    'replies' : tweet['replies'],
                    'retweets' : tweet['retweets'],
                    'likes' : tweet['likes']
                    })
  tweets_df = tweets_df.append(_, ignore_index = True)
tweets_df.head()
Keys: ['tweetId', 'isRetweet', 'time', 'text', 'replies', 'retweets', 'likes', 'entries']