Learn practical skills, build real-world projects, and advance your career
##P460 Machine Learning.

#Akshay Priyadarshi's Code to show an example of Confidence Interval

#Some of the values can be changed to check how the confidence interval holds in different number ranges.

#importing necessary libraries
import numpy as np
import random
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

#Total population.
population=100000
#Approximate Probability of 1 in the generated sample.
probability=0.54 

#To generate a list of 0's and 1's with probability of 1 being nearly 'p'
Total=np.zeros(population)
count=0
for i in range(population):
    r=random.randint(1, 100)/100
    if r>probability:
        Total[i]=0
    else:
        Total[i]=1
        count=count+1
        
#Printing the actual Probability of 1 in population.    
p=count/population
print('The actual probability of 1 in the genrated population of',population,'numbers, is:',p)
sd=(p*(1-p)*population)**0.5
low=p-(2*sd)
high=p+(2*sd)


#Choosing a sample

#number 'n' of samples to be randomly selected from the list
n=1000
c=0
d=0

#Number of times the code is going to run.
times=1000
y1=times/5
sum1=0
p1=np.zeros(times)
fig, ax = plt.subplots(figsize =(10, 5)) 
for i in range(times):
    sample = random.choices(Total, k=n)
    p1[i] = sample.count(1)/n
    sd1=(p1[i]*(1-p1[i])/n)**0.5
    low1=p1[i]-(2*sd1)
    high1=p1[i]+(2*sd1)
    if p1[i] >low:
        if p1[i]<high:
            c=c+1
    if p>low1:
        if p<high1:
            d=d+1
    sum1=sum1+p1[i]        
    ax.axvline(x=low1,ymin=0, ymax=y1,  alpha=0.01,color='green')
    ax.axvline(x=high1, ymin=0, ymax=y1, alpha=0.01,color='black')            
    
#print(c*100/times,'% of the times,','p1 lies within 2 standard deviations of p.')      
print(d*100/times,'% of the times,','p lies within 2 standard deviations of p1.')
print('Here p1 is the probablity of occurance of 1 in a random sample of',n,'numbers out of the original list of numbers.')
print('The sampling was performed',times,'times.')

# Creating histogram 
ax.hist(p1, bins = 'auto',density=None,label='Occurances') 
ax.grid(alpha=0.1)
plt.xlabel('Probability of 1 in the sample -->',fontsize=12)
plt.ylabel('Number of Occurances -->',fontsize=12)
plt.title('Histogram of probabilities in generated samples with replacement',fontsize=14)
ax.axvline(x=p,ymin=0, ymax=y1, color='red',label='Probability in original population')
ax.legend(loc='best')
blue=mpatches.Patch(color='blue', label='Occurances')
red=mpatches.Patch(color='red', label='Probability in original population')
lower = mpatches.Patch(color='green', label=r'$-2\sigma$')
higher= mpatches.Patch(color='black', label=r'$+2\sigma$')
ax.legend(handles=[red, blue,lower,higher])
ax.text(0.5, -times/23, r'Black lines are for "p1$+2\sigma$" upper limits, Green lines are for "p1$-2\sigma$" lower limits.',wrap=True, ha='center',fontsize=13)  
plt.savefig('ml.png', bbox_inches='tight')
plt.show() 
print( 'The darkness of the green and black lines is proportional to frequency of their occurances.')
The actual probability of 1 in the genrated population of 100000 numbers, is: 0.53705 97.2 % of the times, p lies within 2 standard deviations of p1. Here p1 is the probablity of occurance of 1 in a random sample of 1000 numbers out of the original list of numbers. The sampling was performed 1000 times.
Notebook Image
The darkness of the green and black lines is proportional to frequency of their occurances.