This time, we implemented the Metropolis algorithm, which is a typical example of Markov chain Monte Carlo (MCMC). This method is often used when you want to sample not only famous probability distributions such as Gaussian distribution and uniform distribution, but also distributions with more complicated shapes.
Consider sampling from a probability distribution $ p (x) = {1 \ over Z_p} \ tilde {p} (x) $. You do not need to know the normalization constant $ Z_p $. When finding the probability distribution with Bayes' theorem, it is often the case that the normalized constant is not known.
You can't sample directly from here because we only know the non-normalized function $ \ tilde {p} (\ cdot) $. Therefore, we prepare another probability distribution (for example, Gaussian distribution) that can be directly sampled, which is called a proposed distribution. However, the proposed distribution is symmetric.
Flow of metropolis method
The sample series obtained by this procedure has a very strong correlation with the samples before and after it. Sampling often wants independent samples, so keeping only every M samples in the sample series weakens the correlation.
Use matplotlib and numpy
import matplotlib.pyplot as plt
import numpy as np
class Metropolis(object):
    def __init__(self, func, ndim, proposal_std=1., sample_rate=1):
        #Unstandardized function
        self.func = func
        #Data dimension
        self.ndim = ndim
        #Proposed distribution(Gaussian distribution)Standard deviation of
        self.proposal_std = proposal_std
        #Thin out samples
        self.sample_rate = sample_rate
    #sampling
    def __call__(self, sample_size):
        #Initial value setting
        x = np.zeros(self.ndim)
        #List to hold samples
        samples = []
        for i in xrange(sample_size * self.sample_rate):
            #Sampling from the proposed distribution
            x_new = np.random.normal(scale=self.proposal_std, size=self.ndim)
            x_new += x
            #PRML formula(11.33)Calculate the probability that a sample candidate will be accepted
            accept_prob = self.func(x_new) / self.func(x)
            if accept_prob > np.random.uniform():
                x = x_new
            #Hold sample
            if i % self.sample_rate == 0:
                samples.append(x)
        return np.array(samples)
def main():
    #Unstandardized function
    def func(x):
        return np.exp(-0.5 * np.sum(x ** 2, axis=-1) / 5.)
    #First try in one-dimensional space
    print "one dimensional"
    #Decimate the standard deviation of the proposed distribution by 2 and every 10 samples
    sampler = Metropolis(func, ndim=1, proposal_std=2., sample_rate=10)
    #Sampling 100 pieces using the Metropolis method
    samples = sampler(sample_size=100)
    #Check sample mean and variance
    print "mean", np.mean(samples)
    print "var", np.var(samples, ddof=1)
    #Illustrated sample results
    x = np.linspace(-10, 10, 100)[:, None]
    y = func(x) / np.sqrt(2 * np.pi * 5.)
    plt.plot(x, y, label="probability density function")
    plt.hist(samples, normed=True, alpha=0.5, label="normalized sample histogram")
    plt.scatter(samples, np.random.normal(scale=0.001, size=len(samples)), label="samples")
    plt.xlim(-10, 10)
    plt.show()
    #Next try in 2D space
    print "\ntwo dimensional"
    sampler = Metropolis(func, 2, proposal_std=2., sample_rate=10)
    samples = sampler(sample_size=100)
    print "mean\n", np.mean(samples, axis=0)
    print "covariance\n", np.cov(samples, rowvar=False)
    x, y = np.meshgrid(
        np.linspace(-10, 10, 100), np.linspace(-10, 10, 100))
    z = func(np.array([x, y]).reshape(2, -1).T).reshape(100, 100)
    plt.contour(x, y, z)
    plt.scatter(samples[:, 0], samples[:, 1])
    plt.xlim(-10, 10)
    plt.ylim(-10, 10)
    plt.show()
mcmc.py
import matplotlib.pyplot as plt
import numpy as np
class Metropolis(object):
    def __init__(self, func, ndim, proposal_std=1., sample_rate=1):
        self.func = func
        self.ndim = ndim
        self.proposal_std = proposal_std
        self.sample_rate = sample_rate
    def __call__(self, sample_size):
        x = np.zeros(self.ndim)
        samples = []
        for i in xrange(sample_size * self.sample_rate):
            x_new = np.random.normal(scale=self.proposal_std, size=self.ndim)
            x_new += x
            accept_prob = self.func(x_new) / self.func(x)
            if accept_prob > np.random.uniform():
                x = x_new
            if i % self.sample_rate == 0:
                samples.append(x)
        assert len(samples) == sample_size
        return np.array(samples)
def main():
    def func(x):
        return np.exp(-0.5 * np.sum(x ** 2, axis=-1) / 5.)
    print "one dimensional"
    sampler = Metropolis(func, ndim=1, proposal_std=2., sample_rate=10)
    samples = sampler(sample_size=100)
    print "mean", np.mean(samples)
    print "var", np.var(samples, ddof=1)
    x = np.linspace(-10, 10, 100)[:, None]
    y = func(x) / np.sqrt(2 * np.pi * 5.)
    plt.plot(x, y, label="probability density function")
    plt.hist(samples, normed=True, alpha=0.5, label="normalized sample histogram")
    plt.scatter(samples, np.random.normal(scale=0.001, size=len(samples)), label="samples")
    plt.xlim(-10, 10)
    plt.show()
    print "\ntwo dimensional"
    sampler = Metropolis(func, 2, proposal_std=2., sample_rate=10)
    samples = sampler(sample_size=100)
    print "mean\n", np.mean(samples, axis=0)
    print "covariance\n", np.cov(samples, rowvar=False)
    x, y = np.meshgrid(
        np.linspace(-10, 10, 100), np.linspace(-10, 10, 100))
    z = func(np.array([x, y]).reshape(2, -1).T).reshape(100, 100)
    plt.contour(x, y, z)
    plt.scatter(samples[:, 0], samples[:, 1])
    plt.xlim(-10, 10)
    plt.ylim(-10, 10)
    plt.show()
if __name__ == '__main__':
    main()
The figures below show the results of samples in one-dimensional and two-dimensional spaces, respectively.
The contour lines are of the probability density function.
Terminal output result
terminal
one dimensional
mean 0.427558835137
var 5.48086205252
two dimensional
mean
[-0.04893427 -0.04494551]
covariance
[[ 5.02950816 -0.02217824]
 [-0.02217824  5.43658538]]
[Finished in 1.8s]
In both cases, the sample mean and variance are close to the population mean and variance, respectively.
There are other methods such as Hamiltonian Monte Carlo, so I will implement them if I have the opportunity.
Recommended Posts