rustydigg918/untitled9

a year ago
In [17]:
``````#checking the current directory
import os
print(os.getcwd())
``````
```E:\PYTHON\jupyternotebook ```
In [3]:
``````#Setting the working directory
path = 'E:\\PYTHON'
os.chdir(path)
print(os.getcwd())

``````
```E:\PYTHON\jupyternotebook ```
In [7]:
``````a,b,c = 5,4,'Goni'
print(a+b)
``````
```9 ```
In [10]:
``````a=5
print(a, "is  a type of", type(a))``````
```5 is a type of <class 'int'> ```
In [16]:
``````# Find the sum of all the multiples of 3 or 5 below 1000

sum = 0
for i in range(1,1000):
if(i % 3 ==0 or i% 5 ==0):
sum = sum + i
print(sum)

``````
```233168 ```
In [ ]:
``````# find the sum of the even-valued terms
# 1, 2, 3, 5, 8, 13, 21, 34, 55, 89

fibo = 1
prev = 0
for i in range(11):
fibo = fibo + prev
prev= fibo - prev
print(fibo)

``````
In [ ]:
``````# By considering the terms in the Fibonacci sequence whose values do not exceed four million,
# find the sum of the even-valued terms.
x=1
y=2
sum=0
while(y<=4000000):
if y%2==0:
sum+=y
x,y=y,x+y

print(sum)``````
In [5]:
``````# python
# sets
a = {5,2,4,7,9}
a.__class__

``````
Out[5]:
``set``
In [15]:
``````import pandas
mydata
``````
Out[15]:
In [16]:
``````import pandas as pd

# check nnumber of rows in dataframe
print(len(mydata))

``````
```8 ```
In [17]:
``````# check dimension of shape
mydata.shape

``````
Out[17]:
``(8, 6)``
In [33]:
``print(mydata.columns)``
```Index(['policyID', 'statecode', 'county', 'eq_site_limit', 'hu_site_limit', 'fl_site_limit'], dtype='object') ```
In [ ]:
`` ``
In [19]:
``mydata.head()``
Out[19]:
In [18]:
``print(mydata.describe())``
``` policyID eq_site_limit hu_site_limit fl_site_limit count 8.000000 8.000000e+00 8.000000e+00 8.000000e+00 mean 410711.875000 2.925701e+05 2.806175e+06 2.925701e+05 std 318050.385373 4.562503e+05 6.659353e+06 4.562503e+05 min 119736.000000 0.000000e+00 7.952076e+04 0.000000e+00 25% 198303.250000 0.000000e+00 2.383922e+05 0.000000e+00 50% 278615.500000 9.536220e+04 4.137300e+05 9.536220e+04 75% 532389.250000 3.711150e+05 7.168708e+05 3.711150e+05 max 995932.000000 1.322376e+06 1.926000e+07 1.322376e+06 ```
In [57]:
``````#sorting the data
test_data = mydata.sort_values(by = 'eq_site_limit', ascending = False)
#test sorted data

``````
Out[57]:
In [55]:
``````
kk = mydata.groupby(['statecode'])
kk.first()
``````
Out[55]:
In [63]:
``test_data['statecode'][1:3]``
Out[63]:
``````0    FL
7    FL
Name: statecode, dtype: object``````
In [60]:
``````# counting the statecode
test_data['statecode'][test_data["statecode"]=='FL'].count()``````
Out[60]:
``8``
In [65]:
``test_data['policyID']``
Out[65]:
``````1    448094
0    119736
7    223488
2    206893
3    333743
4    172534
5    785275
6    995932
Name: policyID, dtype: int64``````
In [66]:
``mydata.info()``
```<class 'pandas.core.frame.DataFrame'> RangeIndex: 8 entries, 0 to 7 Data columns (total 6 columns): policyID 8 non-null int64 statecode 8 non-null object county 8 non-null object eq_site_limit 8 non-null float64 hu_site_limit 8 non-null float64 fl_site_limit 8 non-null float64 dtypes: float64(3), int64(1), object(2) memory usage: 464.0+ bytes ```
In [84]:
``````import pandas as pd

``````
In [85]:
``ABC_consumption_data``
Out[85]:
In [72]:
``TRAI_data``
Out[72]:
In [74]:
``````Merged_data = pd.merge(ABC_consumption_data, TRAI_data, on= 'network', how='left')

print(len(Merged_data))``````
```388 ```
In [86]:
``````# checking the no of records

``````
```20 ```
In [90]:
``````ABC_consumption_data.shape
print(len(ABC_consumption_data))``````
```388 ```
In [96]:
``````merged_data_stat = Merged_data.describe()
print(merged_data_stat)
``````
``` duration Total consumption count 388.000000 346.000000 mean 353.909794 425144.508671 std 2357.715224 169151.700841 min 1.000000 200000.000000 25% 14.000000 200000.000000 50% 69.000000 500000.000000 75% 245.250000 600000.000000 max 45000.000000 600000.000000 ```
In [95]:
``print(Merged_data['network'].value_counts())``
```Three 128 Tesco 71 Vodafone 66 Meteor 54 landline 42 Airtel 27 Name: network, dtype: int64 ```
In [97]:
``Merged_data.head()``
Out[97]:
``pd.Dataframe(Merged_data['duration']).describe(percentiles=(1,0.99,0.9,0.75,0.5,0.3,0.1,0.01)``
In [100]:
``````duration_col_df = pd.DataFrame(Merged_data['duration'])
duration_col_df
``````
Out[100]:
In [19]:
``````duration_mean = int(duration_col_df.mean())
IQR_duration_P75 = duration_col_df.quantile(q=0.75)
IQR_duration_P25 = duration_col_df.quantile(q=0.25)
IQR_duration = IQR_duration_P75 - IQR_duration_P25

IQR_LL = int(IQR_duration_P25 - 10*IQR_duration)
IQR_UL = int(IQR_duration_P75 + 10*IQR_duration)

``````
```--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-19-1d91939ae41f> in <module> ----> 1 duration_mean = int(duration_col_df.mean()) 2 IQR_duration_P75 = duration_col_df.quantile(q=0.75) 3 IQR_duration_P25 = duration_col_df.quantile(q=0.25) 4 IQR_duration = IQR_duration_P75 - IQR_duration_P25 5 NameError: name 'duration_col_df' is not defined```
In [7]:
``list(range(0,4))``
Out[7]:
``[0, 1, 2, 3]``
`` ``