Learn practical skills, build real-world projects, and advance your career
Updated 3 years ago
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
pip install numpy pandas matplotlib seaborn wordcloud emoji jovian --upgrade
Collecting numpy
Downloading numpy-1.19.2-cp37-cp37m-manylinux2010_x86_64.whl (14.5 MB)
|████████████████████████████████| 14.5 MB 6.3 MB/s eta 0:00:01
Requirement already up-to-date: pandas in /opt/conda/lib/python3.7/site-packages (1.1.3)
Collecting matplotlib
Downloading matplotlib-3.3.2-cp37-cp37m-manylinux1_x86_64.whl (11.6 MB)
|████████████████████████████████| 11.6 MB 11.0 MB/s eta 0:00:01
Collecting seaborn
Downloading seaborn-0.11.0-py3-none-any.whl (283 kB)
|████████████████████████████████| 283 kB 18.2 MB/s eta 0:00:01
Requirement already up-to-date: wordcloud in /opt/conda/lib/python3.7/site-packages (1.8.0)
Requirement already up-to-date: emoji in /opt/conda/lib/python3.7/site-packages (0.6.0)
Collecting jovian
Downloading jovian-0.2.21-py2.py3-none-any.whl (65 kB)
|████████████████████████████████| 65 kB 2.4 MB/s eta 0:00:01
Requirement already satisfied, skipping upgrade: pytz>=2017.2 in /opt/conda/lib/python3.7/site-packages (from pandas) (2019.3)
Requirement already satisfied, skipping upgrade: python-dateutil>=2.7.3 in /opt/conda/lib/python3.7/site-packages (from pandas) (2.8.1)
Requirement already satisfied, skipping upgrade: cycler>=0.10 in /opt/conda/lib/python3.7/site-packages (from matplotlib) (0.10.0)
Requirement already satisfied, skipping upgrade: pillow>=6.2.0 in /opt/conda/lib/python3.7/site-packages (from matplotlib) (8.0.0)
Requirement already satisfied, skipping upgrade: kiwisolver>=1.0.1 in /opt/conda/lib/python3.7/site-packages (from matplotlib) (1.2.0)
Requirement already satisfied, skipping upgrade: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /opt/conda/lib/python3.7/site-packages (from matplotlib) (2.4.7)
Requirement already satisfied, skipping upgrade: certifi>=2020.06.20 in /opt/conda/lib/python3.7/site-packages (from matplotlib) (2020.6.20)
Requirement already satisfied, skipping upgrade: scipy>=1.0 in /opt/conda/lib/python3.7/site-packages (from seaborn) (1.4.1)
Collecting uuid
Downloading uuid-1.30.tar.gz (5.8 kB)
Requirement already satisfied, skipping upgrade: requests in /opt/conda/lib/python3.7/site-packages (from jovian) (2.23.0)
Requirement already satisfied, skipping upgrade: click in /opt/conda/lib/python3.7/site-packages (from jovian) (7.1.1)
Requirement already satisfied, skipping upgrade: pyyaml in /opt/conda/lib/python3.7/site-packages (from jovian) (5.3.1)
Requirement already satisfied, skipping upgrade: six>=1.5 in /opt/conda/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas) (1.14.0)
Requirement already satisfied, skipping upgrade: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->jovian) (1.24.3)
Requirement already satisfied, skipping upgrade: chardet<4,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->jovian) (3.0.4)
Requirement already satisfied, skipping upgrade: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->jovian) (2.9)
Building wheels for collected packages: uuid
Building wheel for uuid (setup.py) ... done
Created wheel for uuid: filename=uuid-1.30-py3-none-any.whl size=6500 sha256=c1e548b91355b486f42939d4e0cee77207d2669aec726a73c376074fff9dc6de
Stored in directory: /root/.cache/pip/wheels/2a/ea/87/dd57f1ecb4f0752f3e1dbf958ebf8b36d920d190425bcdc24d
Successfully built uuid
Installing collected packages: numpy, matplotlib, seaborn, uuid, jovian
Attempting uninstall: numpy
Found existing installation: numpy 1.18.5
Uninstalling numpy-1.18.5:
Successfully uninstalled numpy-1.18.5
Attempting uninstall: matplotlib
Found existing installation: matplotlib 3.2.1
Uninstalling matplotlib-3.2.1:
Successfully uninstalled matplotlib-3.2.1
Attempting uninstall: seaborn
Found existing installation: seaborn 0.10.0
Uninstalling seaborn-0.10.0:
Successfully uninstalled seaborn-0.10.0
ERROR: After October 2020 you may experience errors when installing or updating packages. This is because pip will change the way that it resolves dependency conflicts.
We recommend you use --use-feature=2020-resolver to test your packages with the new resolver before it becomes the default.
tensorflow 2.3.1 requires numpy<1.19.0,>=1.16.0, but you'll have numpy 1.19.2 which is incompatible.
kmeans-smote 0.1.2 requires imbalanced-learn<0.5,>=0.4.0, but you'll have imbalanced-learn 0.7.0 which is incompatible.
kmeans-smote 0.1.2 requires numpy<1.16,>=1.13, but you'll have numpy 1.19.2 which is incompatible.
kmeans-smote 0.1.2 requires scikit-learn<0.21,>=0.19.0, but you'll have scikit-learn 0.23.2 which is incompatible.
dask-xgboost 0.1.11 requires xgboost<=0.90, but you'll have xgboost 1.2.1 which is incompatible.
bokeh 2.2.2 requires tornado>=5.1, but you'll have tornado 5.0.2 which is incompatible.
Successfully installed jovian-0.2.21 matplotlib-3.3.2 numpy-1.19.2 seaborn-0.11.0 uuid-1.30
Note: you may need to restart the kernel to use updated packages.
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
import emoji
from collections import Counter
%matplotlib inline
import seaborn as sns
#import os
#pip install jovian --upgrade
import jovian
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-47-8babc128ff95> in <module>
----> 1 import seaborn as sns
2 #import os
3 #pip install jovian --upgrade
4 import jovian
/opt/conda/lib/python3.7/site-packages/seaborn/__init__.py in <module>
1 # Import seaborn objects
----> 2 from .rcmod import * # noqa: F401,F403
3 from .utils import * # noqa: F401,F403
4 from .palettes import * # noqa: F401,F403
5 from .relational import * # noqa: F401,F403
/opt/conda/lib/python3.7/site-packages/seaborn/rcmod.py in <module>
5 import matplotlib as mpl
6 from cycler import cycler
----> 7 from . import palettes
8
9
/opt/conda/lib/python3.7/site-packages/seaborn/palettes.py in <module>
7 from .external import husl
8
----> 9 from .utils import desaturate, get_color_cycle
10 from .colors import xkcd_rgb, crayons
11
/opt/conda/lib/python3.7/site-packages/seaborn/utils.py in <module>
8
9 import numpy as np
---> 10 from scipy import stats
11 import pandas as pd
12 import matplotlib as mpl
/opt/conda/lib/python3.7/site-packages/scipy/stats/__init__.py in <module>
382 from __future__ import division, print_function, absolute_import
383
--> 384 from .stats import *
385 from .distributions import *
386 from .morestats import *
/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in <module>
177
178 from scipy._lib.six import callable, string_types
--> 179 from scipy.spatial.distance import cdist
180 from scipy.ndimage import measurements
181 from scipy._lib._version import NumpyVersion
/opt/conda/lib/python3.7/site-packages/scipy/spatial/__init__.py in <module>
95 from __future__ import division, print_function, absolute_import
96
---> 97 from .kdtree import *
98 from .ckdtree import *
99 from .qhull import *
/opt/conda/lib/python3.7/site-packages/scipy/spatial/kdtree.py in <module>
6 import numpy as np
7 from heapq import heappush, heappop
----> 8 import scipy.sparse
9
10 __all__ = ['minkowski_distance_p', 'minkowski_distance',
/opt/conda/lib/python3.7/site-packages/scipy/sparse/__init__.py in <module>
227 import warnings as _warnings
228
--> 229 from .base import *
230 from .csr import *
231 from .csc import *
/opt/conda/lib/python3.7/site-packages/scipy/sparse/base.py in <module>
5
6 from scipy._lib.six import xrange
----> 7 from scipy._lib._numpy_compat import broadcast_to
8 from .sputils import (isdense, isscalarlike, isintlike,
9 get_sum_dtype, validateaxis, check_reshape_kwargs,
/opt/conda/lib/python3.7/site-packages/scipy/_lib/_numpy_compat.py in <module>
14
15 if NumpyVersion(np.__version__) > '1.7.0.dev':
---> 16 _assert_warns = np.testing.assert_warns
17 else:
18 def _assert_warns(warning_class, func, *args, **kw):
/opt/conda/lib/python3.7/site-packages/numpy/__init__.py in __getattr__(attr)
211 from .testing import Tester
212 return Tester
--> 213 else:
214 raise AttributeError("module {!r} has no attribute "
215 "{!r}".format(__name__, attr))
/opt/conda/lib/python3.7/site-packages/numpy/testing/__init__.py in <module>
8 from unittest import TestCase
9
---> 10 from ._private.utils import *
11 from ._private import decorators as dec
12 from ._private.nosetester import (
/opt/conda/lib/python3.7/site-packages/numpy/testing/_private/utils.py in <module>
50 IS_PYPY = platform.python_implementation() == 'PyPy'
51 HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None
---> 52 HAS_LAPACK64 = numpy.linalg.lapack_lite._ilp64
53
54
AttributeError: module 'numpy.linalg.lapack_lite' has no attribute '_ilp64'
def rawToDf(file, key):
split_formats = {
'12hr' : '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s[APap][mM]\s-\s',
'24hr' : '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s',
'custom' : ''
}
datetime_formats = {
'12hr' : '%d/%m/%Y, %I:%M %p - ',
'24hr' : '%d/%m/%Y, %H:%M - ',
'custom': ''
}
with open(file, 'r') as raw_data:
raw_string = ' '.join(raw_data.read().split('\n')) # converting the list split by newline char. as one whole string as there can be multi-line messages
user_msg = re.split(split_formats[key], raw_string) [1:] # splits at all the date-time pattern, resulting in list of all the messages with user names
date_time = re.findall(split_formats[key], raw_string) # finds all the date-time patterns
df = pd.DataFrame({'date_time': date_time, 'user_msg': user_msg}) # exporting it to a df
# converting date-time pattern which is of type String to type datetime,
# format is to be specified for the whole string where the placeholders are extracted by the method
df['date_time'] = pd.to_datetime(df['date_time'], format=datetime_formats[key])
# split user and msg
usernames = []
msgs = []
for i in df['user_msg']:
a = re.split('([\w\W]+?):\s', i) # lazy pattern match to first {user_name}: pattern and spliting it aka each msg from a user
if(a[1:]): # user typed messages
usernames.append(a[1])
msgs.append(a[2])
else: # other notifications in the group(eg: someone was added, some left ...)
usernames.append("grp_notif")
msgs.append(a[0])
# creating new columns
df['user'] = usernames
df['msg'] = msgs
# dropping the old user_msg col.
df.drop('user_msg', axis=1, inplace=True)
return df