Jovian
⭐️
Sign In
In [17]:
#checking the current directory
import os
print(os.getcwd())
E:\PYTHON\jupyternotebook
In [3]:
#Setting the working directory
path = 'E:\\PYTHON'
os.chdir(path)
print(os.getcwd())

E:\PYTHON\jupyternotebook
In [7]:
a,b,c = 5,4,'Goni'
print(a+b)
9
In [10]:
a=5
print(a, "is  a type of", type(a))
5 is a type of <class 'int'>
In [16]:
# Find the sum of all the multiples of 3 or 5 below 1000

sum = 0
for i in range(1,1000):
    if(i % 3 ==0 or i% 5 ==0):
        sum = sum + i
print(sum)
    
233168
In [ ]:
# find the sum of the even-valued terms
# 1, 2, 3, 5, 8, 13, 21, 34, 55, 89

fibo = 1
prev = 0
for i in range(11):
    fibo = fibo + prev
    prev= fibo - prev
    print(fibo)


In [ ]:
# By considering the terms in the Fibonacci sequence whose values do not exceed four million, 
# find the sum of the even-valued terms.
x=1
y=2
sum=0
while(y<=4000000):
    if y%2==0:
        sum+=y
    x,y=y,x+y

print(sum)
In [5]:
# python 
# sets
a = {5,2,4,7,9}
a.__class__

Out[5]:
set
In [15]:
import pandas
mydata = pandas.read_csv('Ins.csv')
mydata
Out[15]:
In [16]:
import pandas as pd
mydata = pd.read_csv('Ins.csv')

# check nnumber of rows in dataframe
print(len(mydata))

8
In [17]:
# check dimension of shape
mydata.shape

Out[17]:
(8, 6)
In [33]:
print(mydata.columns)
Index(['policyID', 'statecode', 'county', 'eq_site_limit', 'hu_site_limit', 'fl_site_limit'], dtype='object')
In [ ]:
 
In [19]:
mydata.head()
Out[19]:
In [18]:
print(mydata.describe())
policyID eq_site_limit hu_site_limit fl_site_limit count 8.000000 8.000000e+00 8.000000e+00 8.000000e+00 mean 410711.875000 2.925701e+05 2.806175e+06 2.925701e+05 std 318050.385373 4.562503e+05 6.659353e+06 4.562503e+05 min 119736.000000 0.000000e+00 7.952076e+04 0.000000e+00 25% 198303.250000 0.000000e+00 2.383922e+05 0.000000e+00 50% 278615.500000 9.536220e+04 4.137300e+05 9.536220e+04 75% 532389.250000 3.711150e+05 7.168708e+05 3.711150e+05 max 995932.000000 1.322376e+06 1.926000e+07 1.322376e+06
In [57]:
#sorting the data
test_data = mydata.sort_values(by = 'eq_site_limit', ascending = False)
#test sorted data
test_data.head(6)

Out[57]:
In [55]:

kk = mydata.groupby(['statecode']) 
kk.first()
Out[55]:
In [63]:
test_data['statecode'][1:3]
Out[63]:
0    FL
7    FL
Name: statecode, dtype: object
In [60]:
# counting the statecode
test_data['statecode'][test_data["statecode"]=='FL'].count()
Out[60]:
8
In [65]:
test_data['policyID']
Out[65]:
1    448094
0    119736
7    223488
2    206893
3    333743
4    172534
5    785275
6    995932
Name: policyID, dtype: int64
In [66]:
mydata.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 8 entries, 0 to 7 Data columns (total 6 columns): policyID 8 non-null int64 statecode 8 non-null object county 8 non-null object eq_site_limit 8 non-null float64 hu_site_limit 8 non-null float64 fl_site_limit 8 non-null float64 dtypes: float64(3), int64(1), object(2) memory usage: 464.0+ bytes
In [84]:
import pandas as pd
ABC_consumption_data = pd.read_csv('ABC_company_phone_data.csv')
TRAI_data = pd.read_csv('TRAI_data.csv')

In [85]:
ABC_consumption_data
Out[85]:
In [72]:
TRAI_data
Out[72]:
In [74]:
Merged_data = pd.merge(ABC_consumption_data, TRAI_data, on= 'network', how='left')

print(len(Merged_data))
388
In [86]:
# checking the no of records

20
In [90]:
ABC_consumption_data.shape
print(len(ABC_consumption_data))
388
In [96]:
merged_data_stat = Merged_data.describe()
print(merged_data_stat)
duration Total consumption count 388.000000 346.000000 mean 353.909794 425144.508671 std 2357.715224 169151.700841 min 1.000000 200000.000000 25% 14.000000 200000.000000 50% 69.000000 500000.000000 75% 245.250000 600000.000000 max 45000.000000 600000.000000
In [95]:
print(Merged_data['network'].value_counts())
Three 128 Tesco 71 Vodafone 66 Meteor 54 landline 42 Airtel 27 Name: network, dtype: int64
In [97]:
Merged_data.head()
Out[97]:
pd.Dataframe(Merged_data['duration']).describe(percentiles=(1,0.99,0.9,0.75,0.5,0.3,0.1,0.01)
In [100]:
duration_col_df = pd.DataFrame(Merged_data['duration'])
duration_col_df
Out[100]:
In [19]:
duration_mean = int(duration_col_df.mean())
IQR_duration_P75 = duration_col_df.quantile(q=0.75)
IQR_duration_P25 = duration_col_df.quantile(q=0.25)
IQR_duration = IQR_duration_P75 - IQR_duration_P25

IQR_LL = int(IQR_duration_P25 - 10*IQR_duration)
IQR_UL = int(IQR_duration_P75 + 10*IQR_duration)

--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-19-1d91939ae41f> in <module> ----> 1 duration_mean = int(duration_col_df.mean()) 2 IQR_duration_P75 = duration_col_df.quantile(q=0.75) 3 IQR_duration_P25 = duration_col_df.quantile(q=0.25) 4 IQR_duration = IQR_duration_P75 - IQR_duration_P25 5 NameError: name 'duration_col_df' is not defined
In [7]:
list(range(0,4))
Out[7]:
[0, 1, 2, 3]