
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% variance vs bias
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
clear all
close all

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%original example in lecture 1
%numSets=200
%numPoints=50
%dataSet=sin2xpi(numPoints,0,1,0.2);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% alternative for practice
sinpoints=importdata('sin_mat.dat');
dataSet=sinpoints;
numSets=200
numPoints=size(dataSet,1);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% generate sets by bootstrapping

for f=1:numSets
  set{f}=bootstrap(dataSet,30);
end

for polyDegree=0:9
  meanPred=zeros(size(dataSet,1),1);
  error(polyDegree+1)=0;
  for setIx=1:numSets
    data=set{setIx};
    coefs=polyfit(data(:,1),data(:,2),polyDegree);
    models(setIx).coef=coefs;
    models(setIx).predicted=polyval(coefs,dataSet(:,1));
    meanPred=meanPred+models(setIx).predicted;
    err=mean((dataSet(:,2)-models(setIx).predicted).^2);
    error(polyDegree+1)=error(polyDegree+1)+err;
  end
  %average predicted ys
  meanPred=meanPred./numSets;
  bias(polyDegree+1)=mean((meanPred-dataSet(:,2)).^2);
  tmpVar=0;
  for setIx=1:numSets
    tmpVar=tmpVar+sum((meanPred-models(setIx).predicted).^2)/(size(dataSet(:,1),1)-1);
  end
  variance(polyDegree+1)=tmpVar/numSets;  
end
error=error/numSets;
figure
subplot(1,2,1);
hold on
plot(bias,'-r')
plot(variance,'-b')
plot(error,'-g')
xlabel('Polynomial degree');
%plot(variance+bias.^2,'-k')
legend('Bias', 'Variance', 'Error',2);
axis([0,10,0,0.2])

xs=linspace(min(dataSet(:,1)), max(dataSet(:,1)), 50)
ys =[];
for f=1:10
  coefs=polyfit(dataSet(:,1),dataSet(:,2),f);
  ys=[ys;polyval(coefs,xs)];
end
subplot(1,2,2);
hold on
plot(xs,ys);  
plot(dataSet(:,1),dataSet(:,2),'.b')
ylabel('y');
xlabel('x');
print -dtiff -r200 figureBiasVariance.tiff
