import numpy as np
import math
import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.stats import beta
from scipy.stats import binom
def clopper_pearson(k,n,alpha=0.32):
"""
http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
alpha confidence intervals for a binomial distribution of k expected successes on n trials
Clopper Pearson intervals are a conservative estimate.
Can also do this through statsmodels.stats.proportion.proportion_confint
"""
lo = beta.ppf(alpha/2, k, n-k+1)
hi = beta.ppf(1 - alpha/2, k+1, n-k)
return lo, hi
# Binomial trials
p = 0.98
N = 50
mu = N*p # no. of expected successes
q = 1-p
s = math.sqrt(N*p*q)
# Pick Npicks according to binomial and plot the results in +/- 4sigma
# Superimpose a Gaussian
np.random.seed(12345)
Npicks = 1000
k = np.random.binomial(N, p, Npicks)
bmin = max(0., int(mu-4*s))
bmax = min(N, int(mu+4*s))
bins = np.linspace(bmin, bmax+1, bmax-bmin+2) - 0.5
x = np.linspace(bmin,bmax,1000)+0.5
ax = plt.subplot(111)
ax.hist(k, bins, histtype='step')
ax.plot(x, Npicks*norm.pdf(x, scale=s, loc=mu))
ax.set_xlim((bins[0], bins[-1]))
ax.grid()
ax.set_xlabel("Number of successes")
_ = ax.set_ylim(bottom=0)
# For a given number of successes out of N plot the
# Bayesian posterior (flat prior)
N = 50
Good = 49 # Number of successes
Bad = N - Good
p,q = Good/N, Bad/N
s = math.sqrt(p*q/N)
x = np.linspace(max(0,p-5*s), min(1,p+5*s), 1000)
rv = beta(1+Good, 1+Bad)
ax = plt.subplot(111)
ax.plot(x, rv.pdf(x))
ax.set_xlim((x[0], x[-1]))
ax.grid()
ax.set_xlabel("p")
ax.set_ylabel("Posterior, flat prior")
_ = ax.set_ylim(bottom=0)
# Calculate 1 and 2 sigma interval, compare with Clopper Pearson
for alpha in (0.6827, 0.9545):
up = rv.ppf(1- (1-alpha)/2)
low = rv.ppf((1-alpha)/2)
print("%4.2f%% Bayesian interval: %4.2f -- %4.2f" % (100*alpha,low,up))
low,up = clopper_pearson(Good,N,1-alpha)
print("%4.2f%% Clopper-Pearson interval: %4.2f -- %4.2f" % (100*alpha,low,up))
68.27% Bayesian interval: 0.94 -- 0.99 68.27% Clopper-Pearson interval: 0.94 -- 1.00 95.45% Bayesian interval: 0.89 -- 1.00 95.45% Clopper-Pearson interval: 0.89 -- 1.00