Learn practical skills, build real-world projects, and advance your career
Updated 5 years ago
- Datetime format
1way ya 2way
jovian.commit()
[jovian] Saving notebook..
[jovian] Updating notebook "9fa0c2bca40b448ca926a6ca5f392e24" on https://jvn.io
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jvn.io/PrajwalPrashanth/9fa0c2bca40b448ca926a6ca5f392e24
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
def rawToDf(file):
with open(file, 'r') as raw_data:
raw_string = ' '.join(raw_data.read().split('\n')) # converting the list split by newline char. as one whole string as there can be multi-line messages
user_msg = re.split('\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s[APap][mM]\s-\s', raw_string) [1:] # splits at all the date-time pattern, resulting in list of all the messages with user names
date_time = re.findall('\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s[APap][mM]\s-\s', raw_string) # finds all the date-time patterns
df = pd.DataFrame({'date_time': date_time, 'user_msg': user_msg}) # exporting it to a df
# converting date-time pattern which is of type String to type datetime, format is to be specified for the whole string where the placeholders are extracted by the method
try:
df['date_time'] = pd.to_datetime(df['date_time'], format='%d/%m/%y, %I:%M %p - ')
except:
df['date_time'] = pd.to_datetime(df['date_time'], format='%d/%m/%Y, %I:%M %p - ')
# split user and msg
usernames = []
msgs = []
for i in df['user_msg']:
a = re.split('([\w\W]+?):\s', i) # lazy pattern match to first {user_name}: pattern and spliting it aka each msg from a user
if(a[1:]): # user typed messages
usernames.append(a[1])
msgs.append(a[2])
else: # other notifications in the group(eg: someone was added, some left ...)
usernames.append("grp_notif")
msgs.append(a[0])
# creating new columns
df['users'] = usernames
df['messages'] = msgs
# dropping the old user_msg col.
df.drop('user_msg', axis=1, inplace=True)
return df