This time, we implemented the Metropolis algorithm, which is a typical example of Markov chain Monte Carlo (MCMC). This method is often used when you want to sample not only famous probability distributions such as Gaussian distribution and uniform distribution, but also distributions with more complicated shapes.

Metropolis algorithm

Consider sampling from a probability distribution $ p (x) = {1 \ over Z_p} \ tilde {p} (x) $. You do not need to know the normalization constant $ Z_p $. When finding the probability distribution with Bayes' theorem, it is often the case that the normalized constant is not known.

You can't sample directly from here because we only know the non-normalized function $ \ tilde {p} (\ cdot) $. Therefore, we prepare another probability distribution (for example, Gaussian distribution) that can be directly sampled, which is called a proposed distribution. However, the proposed distribution is symmetric.

Flow of metropolis method

Set the initial value $ x_1 $
Sample one sample candidate ($ x ^ \ * $) from the proposed distribution centered on $ x_i $
$ \ min (1, {\ tilde {p} (x ^ *) \ over \ tilde {p} (x_i)}) $ with a probability of $ x_ {i + 1} = x ^ \ * $ If not accepted, set $ x_ {i + 1} = x_i $
Let the series $ \ {x_n \} _ {n = 1} ^ N $ obtained by repeating steps 2 and 3 be a sample from the probability distribution $ p (x) $. It is a very simple method.

The sample series obtained by this procedure has a very strong correlation with the samples before and after it. Sampling often wants independent samples, so keeping only every M samples in the sample series weakens the correlation.

code

Library

Use matplotlib and numpy

import matplotlib.pyplot as plt
import numpy as np

Metropolis method

class Metropolis(object):

    def __init__(self, func, ndim, proposal_std=1., sample_rate=1):

        #Unstandardized function
        self.func = func

        #Data dimension
        self.ndim = ndim

        #Proposed distribution(Gaussian distribution)Standard deviation of
        self.proposal_std = proposal_std

        #Thin out samples
        self.sample_rate = sample_rate

    #sampling
    def __call__(self, sample_size):
        #Initial value setting
        x = np.zeros(self.ndim)

        #List to hold samples
        samples = []

        for i in xrange(sample_size * self.sample_rate):
            #Sampling from the proposed distribution
            x_new = np.random.normal(scale=self.proposal_std, size=self.ndim)
            x_new += x

            #PRML formula(11.33)Calculate the probability that a sample candidate will be accepted
            accept_prob = self.func(x_new) / self.func(x)
            if accept_prob > np.random.uniform():
                x = x_new

            #Hold sample
            if i % self.sample_rate == 0:
                samples.append(x)

        return np.array(samples)

Main function

def main():

    #Unstandardized function
    def func(x):
        return np.exp(-0.5 * np.sum(x ** 2, axis=-1) / 5.)

    #First try in one-dimensional space
    print "one dimensional"

    #Decimate the standard deviation of the proposed distribution by 2 and every 10 samples
    sampler = Metropolis(func, ndim=1, proposal_std=2., sample_rate=10)
    #Sampling 100 pieces using the Metropolis method
    samples = sampler(sample_size=100)

    #Check sample mean and variance
    print "mean", np.mean(samples)
    print "var", np.var(samples, ddof=1)

    #Illustrated sample results
    x = np.linspace(-10, 10, 100)[:, None]
    y = func(x) / np.sqrt(2 * np.pi * 5.)
    plt.plot(x, y, label="probability density function")
    plt.hist(samples, normed=True, alpha=0.5, label="normalized sample histogram")
    plt.scatter(samples, np.random.normal(scale=0.001, size=len(samples)), label="samples")
    plt.xlim(-10, 10)
    plt.show()

    #Next try in 2D space
    print "\ntwo dimensional"

    sampler = Metropolis(func, 2, proposal_std=2., sample_rate=10)
    samples = sampler(sample_size=100)

    print "mean\n", np.mean(samples, axis=0)
    print "covariance\n", np.cov(samples, rowvar=False)

    x, y = np.meshgrid(
        np.linspace(-10, 10, 100), np.linspace(-10, 10, 100))
    z = func(np.array([x, y]).reshape(2, -1).T).reshape(100, 100)
    plt.contour(x, y, z)
    plt.scatter(samples[:, 0], samples[:, 1])
    plt.xlim(-10, 10)
    plt.ylim(-10, 10)
    plt.show()

Whole code

`mcmc.py`


import matplotlib.pyplot as plt
import numpy as np


class Metropolis(object):

    def __init__(self, func, ndim, proposal_std=1., sample_rate=1):
        self.func = func
        self.ndim = ndim
        self.proposal_std = proposal_std
        self.sample_rate = sample_rate

    def __call__(self, sample_size):
        x = np.zeros(self.ndim)
        samples = []
        for i in xrange(sample_size * self.sample_rate):
            x_new = np.random.normal(scale=self.proposal_std, size=self.ndim)
            x_new += x
            accept_prob = self.func(x_new) / self.func(x)
            if accept_prob > np.random.uniform():
                x = x_new
            if i % self.sample_rate == 0:
                samples.append(x)
        assert len(samples) == sample_size
        return np.array(samples)


def main():

    def func(x):
        return np.exp(-0.5 * np.sum(x ** 2, axis=-1) / 5.)

    print "one dimensional"

    sampler = Metropolis(func, ndim=1, proposal_std=2., sample_rate=10)
    samples = sampler(sample_size=100)

    print "mean", np.mean(samples)
    print "var", np.var(samples, ddof=1)

    x = np.linspace(-10, 10, 100)[:, None]
    y = func(x) / np.sqrt(2 * np.pi * 5.)
    plt.plot(x, y, label="probability density function")
    plt.hist(samples, normed=True, alpha=0.5, label="normalized sample histogram")
    plt.scatter(samples, np.random.normal(scale=0.001, size=len(samples)), label="samples")
    plt.xlim(-10, 10)
    plt.show()

    print "\ntwo dimensional"

    sampler = Metropolis(func, 2, proposal_std=2., sample_rate=10)
    samples = sampler(sample_size=100)

    print "mean\n", np.mean(samples, axis=0)
    print "covariance\n", np.cov(samples, rowvar=False)

    x, y = np.meshgrid(
        np.linspace(-10, 10, 100), np.linspace(-10, 10, 100))
    z = func(np.array([x, y]).reshape(2, -1).T).reshape(100, 100)
    plt.contour(x, y, z)
    plt.scatter(samples[:, 0], samples[:, 1])
    plt.xlim(-10, 10)
    plt.ylim(-10, 10)
    plt.show()


if __name__ == '__main__':
    main()

result

The figures below show the results of samples in one-dimensional and two-dimensional spaces, respectively. The contour lines are of the probability density function.

Terminal output result

`terminal`


one dimensional
mean 0.427558835137
var 5.48086205252

two dimensional
mean
[-0.04893427 -0.04494551]
covariance
[[ 5.02950816 -0.02217824]
 [-0.02217824  5.43658538]]
[Finished in 1.8s]

In both cases, the sample mean and variance are close to the population mean and variance, respectively.

At the end

There are other methods such as Hamiltonian Monte Carlo, so I will implement them if I have the opportunity.

PRML Chapter 11 Markov Chain Monte Carlo Python Implementation