%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Empirical Financial Econometrics Matlab Class Code part 2
% Updated Spring 2021
% This Matlab code facilitates to ASSN 3-4. 
% Copyright at Chaoyi Chen (BCE & MNB)
% All rights reserved. For use by registered students only.
% Please do not distribute without express written consent.
% Email: chaoyi.chen@uni-corvinus.hu
% Web: www.chenchaoyi.com
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
%{
Folloiwng codes are from previous ASSNs as we need part of their results first
%}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  

%%
clear all % clean workspace
warning('off') % off some over-anxious warning so we feel more relaxed in the life. 

%%
%%%%%%%%%%%%%%%%%%%%%%%%% Data load and transformation %%%%%%%%%%%%%%%%%%%%%%%%%
% Load data
load YMOPD1901 % This is mat. file, which stores data that the MATLAB program uses

year = YMOPD(:,1); 
price = YMOPD(:,2);
lagprice = lagmatrix(price,1);
RiskFree = YMOPD(:,3);
n=size(year,1);

% Define log versions 
p = log(price);
p = p(2:end);
lp = log(lagprice);
lp = lp(2:end);

% Define ratios and returns
y = p - lp; 
riskfree = RiskFree(2:end);

%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%   
%{
Now begin with the new study
%}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 

%%
%%%%%%%%%%%%%%%%%%%%%%%%% Unit root test %%%%%%%%%%%%%%%%%%%%%%%%%
% We now want to test if the return (y) is stationary or not. 
% Keep in mind that the null hypothesis is unit root (unstationary)
% You can specify the maximum lags to use and compare BIC to obtain the
% optimal lags

maxlag = 5;
h = zeros(maxlag,1); % record testing result for each lag; 0 for fail to reject the null and 1 for reject the null
ur_bic =zeros(maxlag,1); % record BIC result for each lag
for i= 0:maxlag
[h(i+1,1),p,s,c,reg]=adftest(y,'model','TS','lags', i);% you can change model variant by chaning the function inputs. see HELP for more information 
ur_bic(i+1)=reg.BIC;
end
[ur_optimal_BIC, ur_opt_lag] = min(ur_bic); % choose optimal lags for unit root test
ur_result = h(ur_opt_lag);

if ur_result == 0
    ur_result = 'Fail to reject';
else
    ur_result = 'Reject';
end

% Print results of augmented DF test
%---------TABLES-----------%
diary results_ADF; 
    disp('---------------------------');
    disp('Under the null of unit root');
    disp('---------------------------');
    disp( ur_result);
    disp('---------------------------')
    disp( 'Optimal lag for ADF model =');
    disp('---------------------------')
    disp(ur_opt_lag);
diary off;

%%%%%%%%%%%%%%%%%%%%%%%%% Home Work %%%%%%%%%%%%%%%%%%%%%%%%%
% Test risk free return at home to see it's stationary or not

%%
%%%%%%%%%%%%%%%%%%%%%%%%% ARMA model %%%%%%%%%%%%%%%%%%%%%%%%%
 
% As we reject the null, we show the evidence that sp500 return is stationary
% Then, we move to select the optimal ARMA model to forecast y.
% Note that if you fail to reject the null, you need to take difference
% then re-test the differenced series...

%In the ASSN 4-6, we will use some of functions in "Econometrcs" toolbox of Matlab (should be installed by default)
 %{
 Alternatively, you may use functions in "Oxford MFE" toolbox developed by Kevin Sheppard from Oxford (you need to setup by yourself as it's not an internal source) 
 Followings are steps to add the MFE toolbox for those of you have an interest:
 1). Browse "https://www.kevinsheppard.com/code/matlab/mfe-toolbox/#code"
 2). Click "Code" and then Click "Oxford MFE Toolbox", download the zip
     file. Then, uncompress it to your local directory
 3). Before use the function, you need to set the path inside the Matlab for the toolbox first!
  3.1 On the Home tab, in the Environment section, click Set Path. The Set Path dialog box appears.
  3.2 Use the Set Path dialog box to modify the search path
  3.3 Click "Add Folder" then choose the uncompressed file, save and close
 Then, you will be able to use it
%}
%%
%%%%%%%%%%%%%%%%%%%%%%%%% model selection %%%%%%%%%%%%%%%%%%%%%%%%%
% Set largest lags for both AR and MA terms and estimate all models. Then
% collect log likelihood
pMax = 1; % specify largest lag for AR term 
qMax = 3; % specify largest lag for MA term 
LogL = zeros(pMax, qMax);
SumPQ = LogL;

for p = 1:pMax
    for q = 1:qMax
        ToEstMdl = arima(p,0,q); % construct ARMA(p,q) model
        [Est, ~, LogL(p,q)] = estimate(ToEstMdl, y, 'Display','off'); % estimate ARMA(p,q) model
        SumPQ(p,q) = p+q;
    end
end

% Now we select the optimal (p,q) using the BIC criterion
logL = reshape(LogL, pMax*qMax,1); % reshape our log-likelihood for each pair of (p,q)
numParams = reshape(SumPQ,pMax*qMax,1)+1; % reshape number of parameters used in each try
bic = aicbic(logL,numParams,size(y,1)); % compute BIC for each pair
BIC = reshape(bic, pMax, qMax); % transform back to pMax by qMax matrix to help us locate the optimal (p,q)
[bestP,bestQ] = find(BIC == min(bic)); % find optimal p and q

% after we find the optimal (p,q), we proceed to construct the optimal model and
% estimate the model
ToEstMdl_opt = arima(bestP,0,bestQ);
[Est_opt, ~, info_opt] = estimate(ToEstMdl_opt, y, 'Display','off');

% Print results of in sample results
%---------TABLES-----------%
diary results_ARMA_IN_SAMPLE; 
    disp('---------------------------');
    disp('In sample results');
    disp('---------------------------');
    disp( Est_opt);
    disp('---------------------------')
diary off;

% residual diagonostic
ARMA_resid = infer(Est_opt,y); % collect in sample residual. 
autocorr(ARMA_resid); % plot autocorrelation figure. 
[lbq_in_sample_opt,p_in_sample_opt,~,~] = lbqtest(ARMA_resid); % conduct Ljung-Box test. Under the null of no autocorrelation. 

if lbq_in_sample_opt == 0
   lbq_in_sample_opt = 'Fail to reject';
else
   lbq_in_sample_opt = 'Reject';
end

% Print results of LBQ test_insample
%---------TABLES-----------%
diary results_LBQ_in; 
    disp('---------------------------');
    disp('Under the null of no autocorrelation:');
    disp('---------------------------');
    disp(lbq_in_sample_opt);
    disp('---------------------------')
diary off;

%%
%%%%%%%%%%%%%%%%%%%%%%%%% in sample analysis %%%%%%%%%%%%%%%%%%%%%%%%%
% As both the autocorrelation figure and LBQ test shows no correlation in
% the residuals, our model is well-specified. The next step is to compare
% with the histrorical mean - our another forecasting method

hm_in_sample = mean(y); % in sample histrocial mean (HM)
hm_resi = y-hm_in_sample; % residual of HM model 

MAE_hm_in_sample = mean(abs(hm_resi)); % compute mean absolute error for HM
MAE_ARMA_in_sample = mean(abs(ARMA_resid)); % compute mean absolute error for ARMA
MSE__hm_in_sample = sqrt(sum(hm_resi.^(2))); % compute mean squared error for HM .^(2) means take square on each element 
MSE__ARMA_in_sample = sqrt(sum(ARMA_resid.^(2))); % compute mean squared error for ARMA

% Print results of in sample model comparison 
%---------TABLES-----------%
diary results_horse_race_in_sample; 
    disp('---------------------------');
    disp('MAE of Historical Mean');
    disp('---------------------------');
    disp(MAE_hm_in_sample);
    disp('---------------------------')
    disp('MAE of Optimal ARMA');
    disp('---------------------------');
    disp(MAE_ARMA_in_sample);
    disp('---------------------------')
    disp('MSE of Historical Mean');
    disp('---------------------------');
    disp(MSE__hm_in_sample);
    disp('---------------------------')
    disp('MSE of Optimal ARMA');
    disp('---------------------------');
    disp(MSE__ARMA_in_sample);
diary off;

%%%%%%%%%%%%%%%%%%%%%%%%% Home Work %%%%%%%%%%%%%%%%%%%%%%%%%
% 1). Plot the in sample ARMA/HM forecast with the real value
% 2). Reproduce the in sample anamysis using the risk free return

%%
%%%%%%%%%%%%%%%%%%%%%%%%% out-of-sample analysis %%%%%%%%%%%%%%%%%%%%%%%%%
% Now, we see the ARMA is better than HM in sample. We are curious how our
% ARMA model peroforms in a out-of-sample (OOS) situation. In the following, we
% aim to evaluate the optimal model's OOS preformance and compare
% it to the OOS HM

% We divide the sample into two part, training and testing
training = 50; % set initial training period for recursive or rolling window
estMethod = 'rollingwindow'; % specify OOS method. Option: 'recursivewindow' or 'rollingwindow'

fitted_y = zeros(size(y,1)-training,1); % store fitted y of ARMA
ris_y_oos = zeros(size(y,1)-training,1); % store residual of ARMA
fitted_y_hm = zeros(size(y,1)-training,1); % store fitted y of HM
ris_y_oos_hm = zeros(size(y,1)-training,1); % store residual of HM

for t=1:size(y,1)-training
    
    % Specify training data
    if strcmpi(estMethod,'rollingwindow')
       Esty = y(t:training+t-1);
    else
       if strcmpi(estMethod,'recursivewindow') 
           Esty = y(1:training+t-1);
       else
           error(message('either rollingwindow or recursivewindow has to be choosen'))
       end
    end
    
    % Specify testing data
    Outy = y(training+t);
    
    % Estimate optimal ARMA model
    [Est_opt_oos, ~, info_opt_oos] = estimate(ToEstMdl_opt, Esty, 'Display','off');
    
    % Forecast next period return from ARMA model
    fitted_y(t) = forecast(Est_opt_oos,1,'Y0',Esty);
    
    % Compute residual of ARMA
    ris_y_oos(t) = Outy-fitted_y(t);
    
    % Forecast from HM model and resdiual
    fitted_y_hm(t) = mean(Esty);
    ris_y_oos_hm(t) = Outy-fitted_y_hm(t);
end    

% Plot the forecast
figure
h1 = plot(y,'Color',[.7,.7,.7]);
hold on
h2 = plot(training+1:size(y,1),fitted_y,'b','LineWidth',2);
h3 = plot(training+1:size(y,1),fitted_y_hm,'r:',...
		'LineWidth',2);
legend([h1 h2 h3],'Observed','ARMA Forecast',...
		'HM Forecast','Location','NorthWest');
title('Out-of-sample Forecasts');
grid on
hold off

% Evaluate the OOS preformance by MAE and MSE
MAE_hm_oos_sample = mean(abs(ris_y_oos_hm)); % compute OOS mean absolute error for HM
MAE_ARMA_oos_sample = mean(abs(ris_y_oos)); % compute OOS mean absolute error for ARMA
MSE_hm_oos_sample = sqrt(sum(ris_y_oos_hm.^(2))); % compute OOS mean squared error for HM 
MSE_ARMA_oos_sample = sqrt(sum(ris_y_oos.^(2))); % compute OOS mean squared error for ARMA 

% Print results of OOS model comparison 
%---------TABLES-----------%
diary results_horse_race_oos; 
    disp('---------------------------');
    disp('Out-of-sample Method');
    disp('---------------------------');
    disp(estMethod);
    disp('---------------------------');
    disp('MAE of Historical Mean');
    disp('---------------------------');
    disp(MAE_hm_oos_sample);
    disp('---------------------------')
    disp('MAE of Optimal ARMA');
    disp('---------------------------');
    disp(MAE_ARMA_oos_sample);
    disp('---------------------------')
    disp('MSE of Historical Mean');
    disp('---------------------------');
    disp(MSE_hm_oos_sample);
    disp('---------------------------')
    disp('MSE of Optimal ARMA');
    disp('---------------------------');
    disp(MSE_ARMA_oos_sample);
diary off;

%%%%%%%%%%%%%%%%%%%%%%%%% Home Work %%%%%%%%%%%%%%%%%%%%%%%%%
%{
 1). Try more OOS with rolling method, is it better than recursive results?
 2). Try more OOS with different training period
%}
%%%%%%%%%%%%%%%%%%%%%%%%% The End %%%%%%%%%%%%%%%%%%%%%%%%%