Examples with probability distributions

In [31]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from scipy import stats # has lots of distribution functions in it
import numpy.random as random  # contains uniform distribution random() 
# and randrange() for integers
from scipy import stats # has lots of distribution functions in it
In [32]:
yvec = np.zeros(0)  #empty array to hold randomly generated x values
# generate 1000 randomly generated x values and stick them in xvec
for i in range(2000):
    y = random.random()  # generate a random number y in [0,1), uniformly distributed
    yvec = np.append(yvec,y)  # append it to an array
    
In [33]:
plt.plot(yvec,'b.')
Out[33]:
[<matplotlib.lines.Line2D at 0x7fd117ba8190>]
In [41]:
n,bin_edges,patches = plt.hist(yvec,ec='black')  # plot a histogram
In [42]:
bin_edges   # contains right edge also, left edge is not exactly 0, right edge not exactly 1
Out[42]:
array([2.66237746e-04, 1.00226702e-01, 2.00187167e-01, 3.00147632e-01,
       4.00108097e-01, 5.00068561e-01, 6.00029026e-01, 6.99989491e-01,
       7.99949955e-01, 8.99910420e-01, 9.99870885e-01])
In [46]:
n,bin_edges,patches = plt.hist(yvec,bins=10,\
                               density=True,ec='black')  
#normalize, 10 bins
In [47]:
len(bin_edges)
Out[47]:
11
In [48]:
sum(n)    # this was returned by plt.hist
Out[48]:
10.003955092112134
In [49]:
# evenly spaced bins
bin_edges[1]-bin_edges[0]  
Out[49]:
0.09996046471544787
In [50]:
sum(n)*(bin_edges[1]-bin_edges[0])  # should give 1 as is normalized
Out[50]:
1.0
In [69]:
#We  can set the bin edges to what we want exactly 
mybins = np.linspace(0,1,11)
mybins
Out[69]:
array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])
In [70]:
n,bin_edges,patches = plt.hist(yvec,bins=mybins,\
                               density=True,ec='black')  
In [71]:
bin_edges
Out[71]:
array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])
In [51]:
# integers
from random import randrange,seed
for i in range(10):
    print(randrange(10))
1
4
0
7
4
2
3
5
9
4
In [52]:
# another set of integers [0,10), not the same as above
for i in range(10):
    print( randrange(10))
6
7
5
7
4
6
0
7
1
9
In [53]:
seed(10001)
for i in range(10):
    print( randrange(10))
5
1
1
6
9
6
1
8
4
2
In [54]:
seed(10001)  # the same set of integers as before, but I comment this out it is not the same
for i in range(10):
    print( randrange(10))
5
1
1
6
9
6
1
8
4
2

stats package

https://docs.scipy.org/doc/scipy/reference/stats.html

Has many handy distributions and routines for using them

In [72]:
# From a uniform distribution we generate a 
# vector of random vectors all at once with
yvec2 = stats.uniform.rvs(size=2000)  #rvs = random variable sequence
plt.plot(yvec2,'g.')
Out[72]:
[<matplotlib.lines.Line2D at 0x7fd1171e68b0>]
In [73]:
# stats package, normal distn
r = stats.norm.rvs(size=200)# random variable sequence of size 200, should be fast
print(np.mean(r),np.std(r)) # as we expect?
0.06936834245598607 0.9535779478168698
In [75]:
plt.plot(r,'r.')  # the points cluster around y=0
Out[75]:
[<matplotlib.lines.Line2D at 0x7fd117ac36a0>]
In [58]:
x = stats.norm.rvs(size=10000)  # normal distribution
x_shift = stats.norm.rvs(loc=2,scale=0.2,size=10000)  # normal distribution shifted, scaled
#rv = random variable
In [61]:
j=plt.hist(x,bins=100,density=True,  alpha=0.5)
j=plt.hist(x_shift,bins=100,density=True, alpha=0.5)
In [62]:
plt.hist(x,bins=50,density=True,  alpha=0.3, color='orange')
plt.hist(x_shift,bins=50,density=True, alpha=0.3,color='blue')
xarr = np.linspace(-4,4,200)
# probability distributions in stats come with probability  density functions

y =stats.norm.pdf(xarr)   # the distribution function!
plt.plot(xarr,y,color='orange')
y_shift =stats.norm.pdf(xarr,loc=2,scale=0.2) 
plt.plot(xarr,y_shift,color='blue')
plt.xlabel('x',fontsize=18)
j=plt.ylabel('p(x)',fontsize=18)
In [63]:
# compute the mean and standard deviation from
# the probability distributions
print(stats.norm.mean())
print(stats.norm.std())
0.0
1.0

Stuff to introduce

Mention $$x_{n+1} = (ax_n + b) ~ \texttt{mod} ~ c$$

Mention Mersenne twister https://en.wikipedia.org/wiki/Mersenne_Twister

Show ps6 and discuss inverse method for sampling random variables from a desired PDF

What is a Markov chain? https://en.wikipedia.org/wiki/Markov_process

Show that sum of two random variables has distn that is a convolution.

Show that sum of two random variables has mean sum of means and variance sum of variances.

Mention central limit theorem https://en.wikipedia.org/wiki/Central_limit_theorem

Discuss diffusion

In [ ]: