proj3

Q1
Q2
show histogram to visually see which distribution fits better
helper function to run experiment

% ECE302 -- Project 4
% Steven Lee & Jonathan Lam
clc; clear; close all;
set(0, 'defaultTextInterpreter', 'latex');

% ML estimate function for alpha in rayleigh RV
est_fn_ray = @(samples) sqrt(mean(samples.^2)/2);

Q1

% values to loop over
Ns = logspace(1, 5, 5);
params = 1:4;

% one plot for exponential, one for rayleigh
figure('Position', [0 0 750 1000]);
t1 = tiledlayout(length(params), 3);
sgtitle("Exponential R.V.");

figure('Position', [0 0 750 1000]);
t2 = tiledlayout(length(params), 3);
sgtitle("Rayleigh R.V.");

for param=params
    exp_res = zeros(length(Ns), 3);
    ray_res = zeros(length(Ns), 3);

    for Ni=1:length(Ns)
        for M=100
            N = Ns(Ni);

            [exp_bias, exp_var, exp_mse] = ...
                run_experiment(N, M, param, @exprnd, @mean);
            [ray_bias, ray_var, ray_mse] = ...
                run_experiment(N, M, param, @raylrnd, est_fn_ray);

            exp_res(Ni, :) = [exp_bias exp_var exp_mse];
            ray_res(Ni, :) = [ray_bias ray_var ray_mse];

            fprintf("N=%d param=%d: %f %f %f; %f %f %f\n", ...
                N, param, ...
                exp_bias, exp_var, exp_mse, ...
                ray_bias, ray_var, ray_mse);
        end
    end

    % for exponential
    nexttile(t1);
    semilogx(Ns, exp_res(:,1));
    title(sprintf('Bias ($$\\mu=%d$$)', param));
    xlabel("Samples");
    ylabel("$$E[\hat\mu-\mu]$$");
    grid on;

    nexttile(t1);
    loglog(Ns, exp_res(:,2));
    title('Variance');
    xlabel("Samples");
    ylabel("$$E[(\hat\mu-\bar{\hat\mu})^2]$$");
    grid on;

    nexttile(t1);
    loglog(Ns, exp_res(:,3));
    title('MSE');
    xlabel("Samples");
    ylabel("$$E[(\hat\mu-\mu)^2]$$");
    grid on;

    % for rayleigh
    nexttile(t2);
    semilogx(Ns, ray_res(:,1));
    title(sprintf('Bias ($$\\alpha=%d$$)', param));
    xlabel("Samples");
    ylabel("$$E[\hat\alpha-\alpha]$$");
    grid on;

    nexttile(t2);
    loglog(Ns, ray_res(:,2));
    title('Variance');
    xlabel("Samples");
    ylabel("$$E[(\hat\alpha-\bar{\hat\alpha})^2]$$");
    grid on;

    nexttile(t2);
    loglog(Ns, ray_res(:,3));
    title('MSE');
    xlabel("Samples");
    ylabel("$$E[(\hat\alpha-\alpha)^2]$$");
    grid on;
end

% Explanation of figures:
% - The first figure is for the exponential R.V., the other for Rayleigh.
% - Each column represents one value of the parameter (mu or lambda).
% - For each value of the parameter, multiple (10) trials of different
%   sample sizes (N={10,100,1000,10000,100000}) were used. The bias,
%   variance, and MSE of the estimator was taken from these.
% - Each plot plots N (number of samples) on the x-axis vs. the specified
%   metric.

Q2

load('data');
data = data.';      % need a column vector

% range of values of data
x = linspace(min(data), max(data), 1000);

mu_est = mean(data);
alpha_est = est_fn_ray(data);

lambda_est = 1/mu_est;
exp_pdf = @(x) lambda_est * exp(-lambda_est * x);

alpha2_est = alpha_est^2;
ray_pdf = @(x) x/alpha2_est .* exp(-x.^2/(2*alpha2_est));

% see which distribution generates the higher likelihood
% (use sum of log-likelihoods, also can use product of likelihoods)
exp_likelihood = sum(log(exp_pdf(data)));
ray_likelihood = sum(log(ray_pdf(data)));

% rayleigh has higher likelihood, so it is the more likely distribution
fprintf("Log-likelihood of exponential distribution: %f\n" ...
    + "Log-likelihood of rayleigh distribution: %f\n", ...
    exp_likelihood, ray_likelihood);

Log-likelihood of exponential distribution: 1053.462453
Log-likelihood of rayleigh distribution: 1365.516070

show histogram to visually see which distribution fits better

figure();
hold on;
histogram(data, 'Normalization', 'pdf');
plot(x, exp_pdf(x));
plot(x, ray_pdf(x));
legend(["Sample data (normalized to PDF)", ...
    sprintf("Exponential PDF (mu=%f)", mu_est), ...
    sprintf("Rayleigh PDF (alpha=%f)", alpha_est)]);
title("Data vs. Exponential and Rayleigh ML-Estimated Distributions");
ylabel("PDF");
xlabel("Values");

% we see that the histogram closely matches the Rayleigh distribution,
% so it most likely is drawn from this distribution

helper function to run experiment

% N = number of samples
% M = number of trials
% param = actual parameter
% randfn = function to generate random samples
% estfn = calculate ML estimate of parameter given samples
function [bias, variance, mse] = run_experiment(N, M, param, randfn, estfn)
    % generate M samples of N values sampled from the distribution
    samples = randfn(param, N, M);

    % generate ML estimate of variable
    est = estfn(samples);

    bias = mean(est) - param;
    variance = var(est);
    mse = variance + bias^2;
end

N=10 param=1: 0.002574 0.078209 0.078215; -0.009523 0.028445 0.028536
N=100 param=1: 0.007988 0.011183 0.011247; -0.005721 0.002533 0.002566
N=1000 param=1: 0.000378 0.001286 0.001286; -0.001020 0.000293 0.000294
N=10000 param=1: -0.000920 0.000098 0.000099; -0.000426 0.000024 0.000025
N=100000 param=1: 0.000247 0.000008 0.000008; 0.000111 0.000002 0.000002
N=10 param=2: -0.040950 0.283109 0.284785; 0.011830 0.122604 0.122744
N=100 param=2: -0.007575 0.048410 0.048468; -0.024112 0.009429 0.010010
N=1000 param=2: -0.005067 0.004940 0.004965; 0.003536 0.000870 0.000883
N=10000 param=2: -0.000388 0.000466 0.000466; 0.000211 0.000100 0.000100
N=100000 param=2: 0.000116 0.000036 0.000036; -0.000221 0.000014 0.000014
N=10 param=3: -0.043354 0.686853 0.688733; -0.028280 0.211181 0.211981
N=100 param=3: -0.104062 0.082007 0.092836; 0.005258 0.020529 0.020557
N=1000 param=3: -0.013672 0.009392 0.009579; -0.005131 0.002462 0.002488
N=10000 param=3: -0.005534 0.000959 0.000990; 0.000608 0.000229 0.000229
N=100000 param=3: -0.000150 0.000119 0.000119; -0.000005 0.000019 0.000019
N=10 param=4: 0.127984 1.551503 1.567883; -0.033811 0.389189 0.390333
N=100 param=4: 0.019660 0.182156 0.182542; -0.060038 0.042804 0.046409
N=1000 param=4: 0.011553 0.015798 0.015931; -0.005465 0.003734 0.003764
N=10000 param=4: 0.006851 0.001716 0.001763; 0.001879 0.000401 0.000405
N=100000 param=4: 0.001641 0.000162 0.000164; -0.000130 0.000040 0.000040

Contents

Q1

Q2

show histogram to visually see which distribution fits better

helper function to run experiment