Sign In
In [ ]:
import re
import jovian
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud, STOPWORDS
import emoji
from collections import Counter

Data Preprocessing

def rawToDf(file):
    with open(file, 'r') as raw_data:
        raw_string = ' '.join('\n')) # converting the list split by newline char. as one whole string as there can be multi-line messages
        user_msg = re.split('\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s[APap][mM]\s-\s', raw_string) [1:] # splits at all the date-time pattern, resulting in list of all the messages with user names
        date_time = re.findall('\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s[APap][mM]\s-\s', raw_string) # finds all the date-time patterns
        df = pd.DataFrame({'date_time': date_time, 'user_msg': user_msg}) # exporting it to a df
    # converting date-time pattern which is of type String to type datetime, format is to be specified for the whole string where the placeholders are extracted by the method    
        df['date_time'] = pd.to_datetime(df['date_time'], format='%m/%d/%y, %I:%M %p - ') #10/20/19, 10:24 pm - 
        df['date_time'] = pd.to_datetime(df['date_time'], format='%d/%m/%Y, %I:%M %p - ') #20/10/2019, 10:24 pm -
    # split user and msg 
    usernames = []
    msgs = []
    for i in df['user_msg']:
        a = re.split('([\w\W]+?):\s', i) # lazy pattern match to first {user_name}: pattern and spliting it aka each msg from a user
        if(a[1:]): # user typed messages
        else: # other notifications in the group(eg: someone was added, some left ...)

    # creating new columns         
    df['user'] = usernames
    df['msg'] = msgs

    # dropping the old user_msg col.
    df.drop('user_msg', axis=1, inplace=True)
    return df
In [ ]:
me = "Prajwal Prashanth"


No. of Images, group notifications and dropping them

Q 1)

Who is the most active member of the group. Who is the least active. Is it same on weekday weekend?

Q 2)

Count of all the emoticons that i have used till date.

Q 3)

What can my activity say about my sleep cycle?

Q 4)

What is the difference in Weekend vs Weekday usage pattern?

How many words do I type on average on weekday vs weekend?

Q 5)

Most Usage - Time of Day

Q 6)

In any group, do I have any inclination towards responding to someone?

Q 7)

Which are the most common words?

In [ ]:
comment_words = ' '
stopwords = STOPWORDS.update(['lo', 'ge', 'Lo', 'illa', 'yea', 'ella', 'en', 'na', 'En', 'yeah', 'alli', 'ide', 'okay', 'ok', 'will'])
for val in df.msg.values: 
    val = str(val) 
    tokens = val.split() 
    for i in range(len(tokens)): 
        tokens[i] = tokens[i].lower() 
    for words in tokens: 
        comment_words = comment_words + words + ' '
wordcloud = WordCloud(width = 800, height = 800, 
                background_color ='black', 
                stopwords = stopwords, 
                min_font_size = 10).generate(comment_words) 

In [ ]:
plt.figure(figsize = (8, 8), facecolor = None) 
plt.tight_layout(pad = 0) 
In [ ]:

Know What They Know(atleast by little


  • 1way ya 2way, check for a response time between two people
In [ ]: