% IANTA16: Programa MATLAB para "Proyecto IANTA"
%
% Representación temporal y espectral del sonido de anuros
%
% 16: Clasificación con HMM (Hiden Markov Models) y redes neuronales recurrentes
%       Se toman los parámetros principales de varios frames
%       Aunque conceptualmente es distinto de los algoritmos de "sliding windows"
%       se almacena en el mismo fichero resumen
%

function IANTA16

clear all;
clear global all;
clc;

global PATRON;
global carpeta; %Carpeta de trabajo de los sonidos
global ParUtil; %Parámetros utilizados
global NumPar NumFrames;

%-------------- Variables generales
[FicheroPatron,roiPatron,FicheroResumenPatrones]=DefinePatron;
DefineVariablesGenerales;
carpeta='sonidos\Procesados\';
%carpeta='sonidos\kk\';

OrdPar=[6,1,8,10,7,12,5,3,18,4,17,16,14,13,11,15,9,2];

NumPar=5; %Número de parámetros utilizados
FicheroResumen=[carpeta,'ResumenSW-',num2str(NumPar)];
ParUtil=OrdPar(1:NumPar);
%
[matpar,ClasePatronNum,NomPar]=GeneraDatosPatrones;
matpar=matpar(:,ParUtil);
%}

NumFrames=15; %Número de frames utilizados


%{
%-------------- Calcula la clase de sonido por HMM (Hiden Markov Model)
algoritmo=10;
cabecera={'HMM'};
display(cabecera);
display(NumFrames);
clasificador=fitHMM(matpar,ClasePatronNum);
clasificador=load('HMM');
clasificador=clasificador.clasificador;
[ClaseSonidoFichero,ProbClasFichero]=ClasificaCarpeta(algoritmo,clasificador);
EscribeColumnas(FicheroResumen,NumFrames,algoritmo,...
    cabecera,ClaseSonidoFichero,ProbClasFichero);
%}

%{
%-------------- Calcula la clase de sonido por redes neuronales recursivas
algoritmo=11;
cabecera={'Red neur. rec.'};
display(cabecera);
%
if NumFrames==1
    clasificador=feedforwardnet(10);
else
    clasificador = layrecnet(1:NumFrames-1,10);
end
clasificador.trainParam.showWindow=0;
clasificador=train(clasificador,matpar',ClasePatronNum');
%save('RNN','clasificador');
%clasificador = load('RNN');
%clasificador = clasificador.clasificador;

[ClaseSonidoFichero,ProbClasFichero]=ClasificaCarpeta(algoritmo,clasificador);
EscribeColumnas(FicheroResumen,NumFrames,algoritmo,...
    cabecera,ClaseSonidoFichero,ProbClasFichero);
%}

%
%-------------- Gráficos algoritmos
nhoja=NumFrames;
[~,~,hoja]=xlsread(FicheroResumen,nhoja);
tamhoja=size(hoja);
nfil=tamhoja(1);
ncol=tamhoja(2);
MatClas=cell2mat(hoja(2:nfil,2:ncol));
cabecera=hoja(1,:);
algoritmo=11;

%GraficaPerfilExito(MatClas(:,1),MatClas(:,2*algoritmo))
%GraficaProbExito(MatClas(:,1),MatClas(:,2*algoritmo),MatClas(:,2*algoritmo+1))
%GraficaCompAlg(MatClas,cabecera);
%MatrizConfusion(hoja,algoritmo)
%EvaluacionClasificacion(hoja);
%GraficaROC(hoja);
%CohenKappaAnalysis(hoja);
%GraficaCompNumFrame(FicheroResumen);
%GraficaROCNumFrame(FicheroResumen);
GraficaROCNumFrameRNN(FicheroResumen);

kk=1;
%}

%
% Función que obtiene modelo HMM
%
function clasificador=fitHMM(X,Y)
% X: matriz de predictores (observación,parámetro)
% Y: vector de clases (observación)
% clasificador: clasificador; vector (especies)

global PATRON;
global NumPar NumFrames;

nesp=PATRON.nesp;
nobs=length(Y);
NumCod=256;
NumEst=5;

% Obtención de "CodeBook" y conversión de los patrones a este código
[Z,muX,sigmaX]=zscore(X);
[CodeBook,~,Xcod]=kmeanlbg(Z,NumCod);

% Obtención de los ROI de los patrones para cada clase (especie)
nROI=0;
esROI=0;
for k=1:nobs
    esp=Y(k);
    if esp>nesp
        if esROI
            ROI(nROI,2)=k-1;
            esROI=0;
        end
    else
        if ~esROI
            nROI=nROI+1;
            ROI(nROI,1)=k;
            claseROI(nROI)=esp;
            esROI=1;
        end
    end
end

% Obtención de las secuencias de códigos (por extensión de los ROIs) para cada clase
ROI(1,1)=1;
for iROI=1:nROI-1
    ROI(iROI,2)=ROI(iROI,2)+round((ROI(iROI+1,1)-ROI(iROI,2))/2);
    ROI(iROI+1,1)=ROI(iROI,2)+1;
end
ROI(nROI,2)=nobs;

% Obtención de las matrices iniciales de los HMM
EmIni=ones(NumEst,NumCod) / NumCod; %Matriz de emisiones (inicial)
TrIni=zeros(NumEst,NumEst); %Matriz de transiciones (inicial)
for i=1:NumEst
    TrIni(i,i)=0.5;
end
for i=1:NumEst-1
    TrIni(i,i+1)=0.5;
end
TrIni(NumEst,1)=0.5;

% Obtención de los modelos HMM para cada especie
HMM=cell(nesp,1);
for iesp=1:nesp
    vind=find(claseROI==iesp);
    k=0;
    seq=cell(nesp,1);
    for iROI=vind
        i1=ROI(iROI,1);
        i2=ROI(iROI,2);
        k=k+1;
        seq{k}=Xcod(i1:i2)';
    end
    [TrMat,EmMat] = hmmtrain(seq,TrIni,EmIni);
    HMM{iesp}.TrMat=TrMat;
    HMM{iesp}.EmMat=EmMat;
end
clasificador.HMM=HMM;
clasificador.CodeBook=CodeBook;
clasificador.muX=muX;
clasificador.sigmaX=sigmaX;
save('HMM','clasificador');
kk=1;


%
% Función que clasifica mediante HMM
%
function [Z,ProbZ]=HMMPredict(clasificador,X)
% X: matriz de predictores (observación,parámetro)
% clasificador: clasificador; vector (clases)
% Z: vector de predicciones (observación)
% ProbZ: Probabilidad de las predicciones: matriz (observación,especie)

global PATRON;
nesp=PATRON.nesp;
global NumPar NumFrames;

nobs=length(X);
CodeBook=clasificador.CodeBook;
HMM=clasificador.HMM;
muX=clasificador.muX;
sigmaX=clasificador.sigmaX;

Xcod=zeros(nobs,1);
for k=1:nobs
    %d=disteusq(CodeBook,X(k,:));
    xnorm=(X(k,:)-muX)./sigmaX;
    d=disteusq(CodeBook,xnorm);
    [~,Xcod(k)]=min(d);
end

Z=zeros(nobs,1);
ProbZ=zeros(nobs,nesp);
for k=NumFrames:nobs
    k1=k-NumFrames+1;
    k2=k;
    for iesp=1:nesp
        TrMat=HMM{iesp}.TrMat;
        EmMat=HMM{iesp}.EmMat;
        umbral=1e-10;
        TrMat(TrMat<umbral)=umbral ;
        EmMat(EmMat<umbral)=umbral ;
        [~,ProbZ(k,iesp)]=hmmdecode(Xcod(k1:k2)',TrMat,EmMat);
    end
    [~,Z(k)]=max(ProbZ(k,:));
end
kk=1;




%
% Función que genera los datos de los patrones
%
function [X,Y,NomPar]=GeneraDatosPatrones
% X: matriz de predictores (observación,parámetro)
% Y: vector de clases (observación)
% NomPar: nombre de paráemtros (parámetro)

global PATRON;

[FicheroPatron,roiPatron,FicheroResumenPatrones]=DefinePatron;
nesp=PATRON.nesp;
X=[];
ClasePatronTxt=[];
for iesp=1:nesp
    FicheroMuestras=FicheroPatron{iesp};
    nmuest=length(FicheroMuestras);
    for imuest=1:nmuest
        NombreFichero=FicheroMuestras{imuest};
        [~,~,hoja]=xlsread(NombreFichero);
        tamhoja=size(hoja);
        nfil=tamhoja(1);
        ncol=tamhoja(2);
        matparx=cell2mat(hoja(2:nfil,2:ncol-1));
        ClasePatronTxtX=hoja(2:nfil,ncol);
        X=[X;matparx];
        ClasePatronTxt=[ClasePatronTxt;ClasePatronTxtX];
    end %de una muestra
end % de una especie
Y=ClaseTxt2Num(ClasePatronTxt);
npar=(ncol-2)/2;
X=X(:,1:npar);
NomPar=hoja(1,2:npar+1);


%
% Función que clasifica los sonidos de un fichero 
%   (a partir del fichero Excel)
%
function [ClaseSonido,ProbClase]=ClasificaFichero...
    (NombreFichero,algoritmo,clasificador)
% NombreFichero: nombre del fichero a procesar (sin extensión)
% algoritmo: Identificador del algoritmo de clasificación
% clasificador: Identificador del clasificador utilizado
% ClaseSonido: Resultado de la clasificación
% ProbClase: Probabilidad de la clasificación (factor de mérito)

global PATRON;
global ParUtil; %Parámetros utilizados
global NumPar NumFrames;

nesp=PATRON.nesp;
[~,~,hoja]=xlsread(NombreFichero);
tamhoja=size(hoja);
nfil=tamhoja(1);
ncol=tamhoja(2);
matpar=cell2mat(hoja(2:nfil,2:ncol-1));
matpar=matpar(:,ParUtil);
nobservaciones=size(matpar,1);

switch algoritmo
    case 10 % HMM
        [PredSonido,ProbSonido]=HMMPredict(clasificador,matpar);

    case 11 % Red neuronal recurrente
        PredSonido=round(clasificador(matpar')');
        PredSonido=min(PredSonido,nesp+1);
        PredSonido=max(PredSonido,1);
        ProbSonido=zeros(nobservaciones,nesp+1);
        for k=1:nobservaciones-NumFrames+1
            ProbSonido(k,PredSonido(k))=1;
        end
        
end

ContEsp=zeros(1,nesp);
for iesp=1:nesp
    ResComp=eq(PredSonido,iesp*ones(length(PredSonido),1));
    ContEsp(iesp)=sum(ResComp);
end
[~,ClaseSonido]=max(ContEsp);
ProbClase=ContEsp(ClaseSonido)/sum(ContEsp);
kk=1;


%
% Función que clasifica los sonidos de una carpeta 
%   (a partir de los nombres de los ficheros Excel)
%
function [NombreFichero,ClaseSonidoFichero]=ClasificaCarpetaporNombre
% NombreFichero: Nombre de cada fichero excel (vector)
% ClaseSonidoFichero: Clase de sonido de cada fichero excel (vector)

global carpeta; %Carpeta de trabajo de los sonidos

NombreFichero=ObtieneFicherosExcelCarpeta(carpeta);
nfich=length(NombreFichero);
for ifich=1:nfich
    nombre=NombreFichero{ifich};

    for i=length(nombre):-1:1
        car=nombre(i);
        if strcmp(car,'\')
            idSon=nombre(i+1:i+2);
            break;
        end
    end
    
    switch idSon
        case 'A1'
            ClaseSonidoFichero(ifich)=1;
        case 'A2'
            ClaseSonidoFichero(ifich)=1;
        case 'A3'
            ClaseSonidoFichero(ifich)=2;
        case 'B1'
            ClaseSonidoFichero(ifich)=3;
        case 'B2'
            ClaseSonidoFichero(ifich)=3;
    end

end %del fichero


%
% Función que clasifica los sonidos de una carpeta 
%   (a partir de los ficheros Excel)
%
function [ClaseSonidoFichero,ProbClasFichero]=...
    ClasificaCarpeta(algoritmo,clasificador)
% algoritmo: Identificador del algoritmo de clasificación
% clasificador: Identificador del clasificador utilizado
% ClaseSonidoFichero: Clase de sonido predicho de cada fichero excel (vector)
% ProbClasFichero: Probabilidad de la clase de sonido de cada fichero excel (vector)

global carpeta; %Carpeta de trabajo de los sonidos

NombreFichero=ObtieneFicherosExcelCarpeta(carpeta);
nfich=length(NombreFichero);
for ifich=1:nfich
    nombre=NombreFichero{ifich};
    disp(nombre);
    [ClaseSonidoFichero(ifich),ProbClasFichero(ifich)]=...
        ClasificaFichero(nombre,algoritmo,clasificador);
    disp(ClaseSonidoFichero(ifich));
end %del fichero


%
% Función que escribe las columnas del fichero excel correspondientes a un
%   algoritmo de clasificación
%
function EscribeColumnas(FicheroResumen,hoja,algoritmo,...
    cabecera,ClaseSonido,ProbClas)
% FicheroResumen: Nombre del fichero excel en el que se escribe
% hoja: Número de hoja en la que escribir
% algoritmo: Identificador del algoritmo de clasificación
% cabecera: Texo con la cabecera de las columnas
% ClaseSonido: Resultado de la clasificación
% ProbClas: Probabilidad de la clasificación (factor de mérito)

columna=2*algoritmo+1;
LetCol1=char('A'+columna-1);
LetCol2=char('A'+columna);
xlswrite(FicheroResumen,cabecera,hoja,[LetCol1,'1']);
xlswrite(FicheroResumen,ClaseSonido',hoja,[LetCol1,'2']);
xlswrite(FicheroResumen,ProbClas',hoja,[LetCol2,'2']);

%
% Función que traza el gráfico de perfíl de éxito de una técnica de
% clasificación
%
function GraficaPerfilExito(SonidoReal,ClaseSonido)
% ClaseSonido: Clase de sonido predicho de cada fichero excel (vector)
% SonidoReal: Clase de sonido real de cada fichero excel (vector)

global PATRON;

nesp=PATRON.nesp;
nexito=zeros(1,nesp);
nfichesp=zeros(1,nesp);
nfich=length(ClaseSonido);
for ifich=1:nfich
    nfichesp(SonidoReal(ifich))=nfichesp(SonidoReal(ifich))+1;
    if ClaseSonido(ifich)==SonidoReal(ifich)
        nexito(ClaseSonido(ifich))=nexito(ClaseSonido(ifich))+1;
    end
end

% Gráfico del perfil de éxito
theta=linspace(0,2*pi,nesp+1);
tasaexito=zeros(1,nesp+1);
tasaexito(1:nesp)=nexito./nfichesp;
tasaexito(nesp+1)=tasaexito(1);
rectangle('Position',[-1,-1,2,2],'Curvature',[1,1],'FaceColor','w');
axis off;
axis equal;
for tasa=[25,50,75]/100
    rectangle('Position',[-tasa,-tasa,2*tasa,2*tasa],'Curvature',[1,1],...
        'LineStyle',':');
end
for iesp=1:nesp
    th1=theta(iesp);
    xf=cos(th1);
    yf=sin(th1);
    color=PATRON.coloresp(iesp,:);
    line([0,xf],[0,yf],'Color',color);
    
    x1=tasaexito(iesp)*cos(th1);
    y1=tasaexito(iesp)*sin(th1);
    th2=theta(iesp+1);
    x2=tasaexito(iesp+1)*cos(th2);
    y2=tasaexito(iesp+1)*sin(th2);
    line([x1,x2],[y1,y2],'Color','k');
    
    delta=0.1;
    x1=x1-delta/2;
    y1=y1-delta/2;
    rectangle('Position',[x1,y1,delta,delta],'Curvature',[1,1],...
        'FaceColor',color);

    if xf>0
        ha='Left';
    elseif xf<0
        ha='Right';
    else
        ha='Center';
    end
    rt=1.1;
    xt=rt*cos(th1);
    yt=rt*sin(th1);
    label=[PATRON.NombreEspecie{iesp},' (',num2str(100*tasaexito(iesp),'%3.0f'),'%)'];
    text(xt,yt,label,'HorizontalAlignment',ha,'Color',color);
end
axis([-2,2,-1,1]);

exitoglobal=sum(nexito)/nfich*100;
title(['Exito global: ',num2str(exitoglobal,'%6.2f'),'%']);


%
% Función que traza el gráfico de la probabilidad de éxito 
%   de una técnica de clasificación
%
function GraficaProbExito(ClaseSonido,SonidoReal,ProbClase)
% ClaseSonido: Clase de sonido predicho de cada fichero excel (vector)
% SonidoReal: Clase de sonido real de cada fichero excel (vector)
% ProbClase: Probabilidad de la clase de sonido real de cada fichero excel (vector)

global PATRON;

nesp=PATRON.nesp;
nfichesp=zeros(1,nesp);
nfich=length(SonidoReal);
for ifich=1:nfich
    nfichesp(ClaseSonido(ifich))=nfichesp(ClaseSonido(ifich))+1;
end

% Dibujo de los áreas de cada especie real
xi=0;
for iesp=1:nesp
    if iesp~=nesp
        xf=sum(nfichesp(1:iesp))+0.5;
    else
        xf=sum(nfichesp(1:iesp))+1;
    end
    color=PATRON.coloresp(iesp,:);
    patch([xi,xf,xf,xi],[0,0,100,100],color,...
        'EdgeColor','none','FaceColor',color,'FaceAlpha',0.2);
    hold on;
    xi=xf;
end
axis([0,nfich+1,0,100]);

% Dibujo de la decisión de cada fichero
for ifich=1:nfich
    color=PATRON.coloresp(SonidoReal(ifich),:);
    plot([ifich,ifich],[0,100*ProbClase(ifich)],'Color',color);
    hold on;
    plot([ifich,ifich],100*[ProbClase(ifich),ProbClase(ifich)],'.','Color',color);
end %del fichero


%
% Función que traza el gráfico de comparación de número de Frames
%
function GraficaCompNumFrame(FicheroResumen)
% FicheroResumen: Nombre del fichero con el resumen de cálculos

global PATRON;
global ParUtil; %Parámetros utilizados
global NumPar NumFrames;

nesp=PATRON.nesp;
[~,~,hoja]=xlsread(FicheroResumen,1);
nfil=size(hoja,1);
ncol=size(hoja,2);
MatClas=cell2mat(hoja(2:nfil,2:ncol));
nfich=nfil-1;
nalg=(ncol-2)/2;
ClaseReal=MatClas(:,1);
%nomAlg={'ArDec','kNN','SVM','RegLog','Neur','Discr','Bayes','DisMin','MaxVer','HMM','NeuRec'};
nomAlg={'DecTr','kNN','SVM','LogReg','Neur','Discr','Bayes','MinDis','MaxLik','HMM','RecNN'};
ordalg=[8,9,1:7,10,11];
Merito=zeros(NumFrames,nalg);
for inumframes=1:NumFrames
    [~,~,hoja]=xlsread(FicheroResumen,inumframes);
    MatClas=cell2mat(hoja(2:nfil,2:ncol));
    nfichesp=zeros(nesp,1);
    nexito=zeros(nesp,nalg);
    %Cálculo del número de aciertos
    for ifich=1:nfich
        nfichesp(ClaseReal(ifich))=nfichesp(ClaseReal(ifich))+1;
        for ialg=1:nalg
            colalg=2*ialg;
            ClasePred=MatClas(:,colalg);
            if ClasePred(ifich)==ClaseReal(ifich)
                nexito(ClaseReal(ifich),ialg)=nexito(ClaseReal(ifich),ialg)+1;
            end
        end
    end
    %Cálculo de las tasas de éxito
    TasaExito=zeros(nesp,nalg);
    TasaExitoGlobal=zeros(1,nalg);
    RangoExitoGlobal=zeros(1,nalg);
    for ialg=1:nalg
        TasaExito(:,ialg)=100*nexito(:,ialg)./nfichesp;
        TasaExitoGlobal(ialg)=100*sum(nexito(:,ialg))./nfich;
        RangoExitoGlobal(ialg)=max(TasaExito(:,ialg))-min(TasaExito(:,ialg));
        x=100-TasaExitoGlobal(ialg);
        y=RangoExitoGlobal(ialg);
        Merito(inumframes,ialg)=sqrt(x^2+y^2);
    end
end %del número de parámetros

%
% Gráfico de la tasa de éxito de cada algoritmo
MarkerVec=['+','o','*','s','d','p'];
xText=cell(nalg,1);
for ix=1:nalg
    ialg=ordalg(ix);
    xText(ix)=nomAlg(ialg);
    imarker=rem(ix-1,length(MarkerVec))+1;
    plot(1:NumFrames,Merito(:,ialg),...
        'Marker',MarkerVec(imarker));
    hold all;
end
legend(xText,'Location','BestOutside');

figure;
plot(1:NumFrames,mean(Merito,2));
hold all;
plot(1:NumFrames,min(Merito,[],2));
plot(1:NumFrames,max(Merito,[],2));


%
% Función que traza el gráfico de comparación del ROC vs. número de Frames
%
function GraficaROCNumFrame(FicheroResumen)
% FicheroResumen: Nombre del fichero con el resumen de cálculos

global PATRON;
global ParUtil; %Parámetros utilizados
global NumPar NumFrames;

nomAlg={'DisMin','MaxVer','ArDec','kNN','SVM','RegLog','Neur','Discr','Bayes','HMM','RNN'};
nalg=length(nomAlg);
ROC=zeros(NumFrames,nalg);
F1=zeros(NumFrames,nalg);
%Cálculo de la evaluación ROC
for inumframes=1:NumFrames
    [~,~,hoja]=xlsread(FicheroResumen,inumframes);
    performance=EvaluacionClasificacion(hoja);
    %TPR=performance(:,4);
    %FPR=1-performance(:,5);
    %ROC(inumframes,:)=100*sqrt( FPR.^2 + (1-TPR).^2 ) / sqrt(2);
    PRC=performance(:,3);
    SNS=performance(:,4);
    SPC=1-performance(:,5);
    ROC(inumframes,:)=100*sqrt( SNS.^2 + SPC.^2 ) / sqrt(2);
    F1(inumframes,:)= 200* (PRC.*SNS) ./ (PRC+SNS); %F1 score
end %del número de parámetros

%
% Gráfico de la tasa de éxito de cada algoritmo
MarkerVec=['+','o','*','s','d','p'];
xText=cell(nalg,1);
%par = ROC;
par = F1;
for ialg=1:nalg
    xText(ialg)=nomAlg(ialg);
    imarker=rem(ialg-1,length(MarkerVec))+1;
    plot(1:NumFrames,par(:,ialg),...
        'Marker',MarkerVec(imarker));
    hold all;
end
%xlabel('Tamaño de la ventana');
xlabel('Window size');
%ylabel('ROC (%)');
ylabel('F_1 (%)');
legend(xText,'Location','BestOutside');

figure;
%par = ROC;
par = F1;
plot(1:NumFrames,max(par,[],2));
hold all;
plot(1:NumFrames,mean(par,2));
algopt=3; %Algoritmo óptimo 
%plot(1:NumFrames,par(:,algopt));
plot(1:NumFrames,min(par,[],2));
xlabel('Window size');
%ylabel('ROC (%)');
ylabel('F_1 (%)');
%leyenda=[{'Max'},{'Mean'},{'Best'},{'Min'}];
leyenda=[{'Max'},{'Mean'},{'Min'}];
legend(leyenda,'Location','SouthEast');
ylim([0,100]);

figure;
algopt=3;
%dminROC=min(ROC,[],2)-min(ROC(1,:));
dminROC=ROC(:,algopt)-ROC(1,algopt);
plot(1:NumFrames,dminROC);
%xlabel('Tamaño de la ventana');
%ylabel('\Delta ROC (%)');
xlabel('Window size');
ylabel('\Delta ROC (%)');


%
% Función que traza el gráfico de comparación del ROC vs. número de Frames
%       Para las RNN
%
function GraficaROCNumFrameRNN(FicheroResumen)
% FicheroResumen: Nombre del fichero con el resumen de cálculos

global PATRON;
global ParUtil; %Parámetros utilizados
global NumPar NumFrames;

nomAlg={'DisMin','MaxVer','ArDec','kNN','SVM','RegLog','Neur','Discr','Bayes','HMM','RNN'};
nalg=11;
ROC=zeros(NumFrames,nalg);
F1=zeros(NumFrames,nalg);
%Cálculo de la evaluación ROC
for inumframes=1:NumFrames
    [~,~,hoja]=xlsread(FicheroResumen,inumframes);
    performance=EvaluacionClasificacion(hoja);
    PRC=performance(:,3);
    SNS=performance(:,4);
    SPC=1-performance(:,5);
    ROC(inumframes,:)=100*sqrt( SNS.^2 + SPC.^2 ) / sqrt(2);
    F1(inumframes,:)= 200* (PRC.*SNS) ./ (PRC+SNS); %F1 score
end %del número de parámetros

%
% Gráfico de la tasa de éxito de cada algoritmo
MarkerVec=['+','o','*','s','d','p'];
%par = ROC;
par = F1;
for ialg=nalg
    xText=nomAlg(ialg);
    imarker=rem(ialg-1,length(MarkerVec))+1;
    plot(1:NumFrames,par(:,ialg),...
        'Marker',MarkerVec(imarker));
    hold all;
end
%xlabel('Tamaño de la ventana');
xlabel('Number of frames');
%ylabel('ROC (%)');
ylabel('F_1 (%)');
title(xText);
ylim([0,100]);


%
% Función que traza el gráfico de comparación de algoritmos
%
function GraficaCompAlg(hoja,cabecera)
% hoja: Hoja con datos de clase de sonido para cada fichero excel y cada algoritmo
% cabecera: Cabecera de las columnas de las hojas (vector)

global PATRON;

nesp=PATRON.nesp;
tamhoja=size(hoja);
nfil=tamhoja(1);
ncol=tamhoja(2);
nfich=nfil;
nalg=(ncol-1)/2;
ClaseReal=hoja(:,1);
nfichesp=zeros(nesp,1);
nexito=zeros(nesp,nalg);
%nomAlg={'ArDec','kNN','SVM','RegLog','Neur','Discr','Bayes','DisMin','MaxVer','HMM','NeuRec'};
nomAlg={'DecTr','kNN','SVM','LogReg','Neur','Discr','Bayes','MinDis','MaxLik','HMM','RecNN'};
ordalg=[8,9,1:7,10,11];
for ifich=1:nfich
    nfichesp(ClaseReal(ifich))=nfichesp(ClaseReal(ifich))+1;
    for ix=1:nalg
        ialg=ordalg(ix);
        colalg=2*ialg;
        ClasePred=hoja(:,colalg);
        if ClasePred(ifich)==ClaseReal(ifich)
            nexito(ClaseReal(ifich),ix)=nexito(ClaseReal(ifich),ix)+1;
        end
    end
end

TasaExito=zeros(nesp,nalg);
TasaExitoGlobal=zeros(1,nalg);
RangoExitoGlobal=zeros(1,nalg);
xText=cell(nalg,1);
for ix=1:nalg
    TasaExito(:,ix)=100*nexito(:,ix)./nfichesp;
    TasaExitoGlobal(ix)=100*sum(nexito(:,ix))./nfich;
    RangoExitoGlobal(ix)=max(TasaExito(:,ix))-min(TasaExito(:,ix));
    xText{ix}=nomAlg{ordalg(ix)};
end

%
% Gráfico de la tasa de éxito de cada algoritmo
bar(1:nalg,TasaExitoGlobal,'c');
hold all;
for iesp=1:nesp
    plot(1:nalg,TasaExito(iesp,:),'*');
end
axis([0,nalg+1,0,110]);
set(gca,'XTickLabel',xText);
ylabel('Tasa de éxito');
%}

% Gráfico de la tasa de éxito vs. Rango de éxito
figure;
MarkerVec=['+','o','*','s','d','p'];
for ialg=1:nalg
    imarker=rem(ialg-1,length(MarkerVec))+1;
    plot(100-TasaExitoGlobal(ialg),RangoExitoGlobal(ialg),...
        'Marker',MarkerVec(imarker),'LineStyle','none');
    hold all;
end
legend(xText,'Location','Best');
xlabel('Tasa de error');
ylabel('Rango de tasa de error');
axis([0,100,0,100]);


%
% Obtiene los nombres de los ficheros Excel de una carpeta
%
function NombreFichero=ObtieneFicherosExcelCarpeta(carpeta)
% carpeta: nombre de la carpeta a analizar
% NombreFichero: nombre de los ficheros (vector de celdas)

dirant=cd;
cd(carpeta);
directorio=dir;
cd(dirant);

nficheros=length(directorio);
for i=1:nficheros
    nombres{i}=directorio(i).name;
end

[NombresOrd,iNombresOrd]=sort(nombres);
nfich=0;
for i=3:nficheros % Evita el '.' y el '..'
    j=iNombresOrd(i);
    NombreCompleto=directorio(j).name;
    longnom=length(NombreCompleto);
    idSon=NombreCompleto(1:3);
    switch idSon
        case 'A1-'
            idSonCorrecto=1;
        case 'A2-'
            idSonCorrecto=1;
        case 'A3-'
            idSonCorrecto=1;
        case 'B1-'
            idSonCorrecto=1;
        case 'B2-'
            idSonCorrecto=1;
        otherwise
            idSonCorrecto=0;
    end
    for k=longnom:-1:1
        if NombreCompleto(k)=='.'
            extension=NombreCompleto(k+1:longnom);
            break;
        end
    end
    if idSonCorrecto && strcmp(extension,'xls')
        nfich=nfich+1;
        NombreFichero{nfich}=[carpeta,NombreCompleto(1:k-1)];
    end
end


%
% Función que define las variables generales del programa
%
function DefineVariablesGenerales

global GENERAL; % Estructura de datos generales
    %GENERAL.hopSize: Duración de un "salto" de tiempo en el espectrograma (MP7)
    %GENERAL.msegSize: Duración de un microsegmento
    %GENERAL.frPrelev: Frecuencias de la banda relevante de potencias [baja,alta]
    %GENERAL.nForm: Número máximo de formantes considerados
    %GENERAL.NomParFrame: Nombre de los parámetros de un frame
    %GENERAL.NomCorParFrame: Nombre corte de los parámetros de un frame
    %GENERAL.ParSigEsp: Nombre de los parámetros significativos para determinar una especie en un frame
    %GENERAL.NumParPrimarios: Número de parámetros de frame primarios
    %GENERAL.NumParSecundarios: Número de parámetros de frame secundarios
    %GENERAL.NumParTerciarios: Número de parámetros de frame terciarios
    %GENERAL.gradoLPC: grado del polinomio usado en el algoritmo LPC
    %GENERAL.frFilt: Frecuencias de corte del filtr de preprocesamiento[baja,alta]
global PATRON;
    

GENERAL.hopSize=0.01; %Duración de un "salto" de tiempo en el espectrograma (MP7)
GENERAL.msegSize=0.1; %Duración de un microsegmento
GENERAL.frPrelev=[500,5000]; %Frecuencias de la banda relevante de potencias
GENERAL.nForm=3; %Número de formantes
GENERAL.gradoLPC=30; %Grado del polinomio usado en el algoritmo LPC
GENERAL.frFilt=[300,10000]; %Frecuencias de corte del filtro de preprocesamiento[baja,alta]

nesp=PATRON.nesp;
nform=GENERAL.nForm;
i=0;
i=i+1; GENERAL.NomParFrame{i}='Pt';
i=i+1; GENERAL.NomParFrame{i}='PRelev';
i=i+1; GENERAL.NomParFrame{i}='CP';
i=i+1; GENERAL.NomParFrame{i}='DE';
i=i+1; GENERAL.NomParFrame{i}='Pl';
i=i+1; GENERAL.NomParFrame{i}='Pitch';
i=i+1; GENERAL.NomParFrame{i}='Ra';
i=i+1; GENERAL.NomParFrame{i}='Fla';
for k=1:nform
    i=i+1; GENERAL.NomParFrame{i}=['FreqForm',num2str(k)];
    i=i+1; GENERAL.NomParFrame{i}=['AbForm',num2str(k)];
end
i=i+1; GENERAL.NomParFrame{i}='CA';
i=i+1; GENERAL.NomParFrame{i}='DeA';
i=i+1; GENERAL.NomParFrame{i}='DiA';
i=i+1; GENERAL.NomParFrame{i}='VA';
GENERAL.NumParPrimarios=i;

i=i+1; GENERAL.NomParFrame{i}='PtSigma';
i=i+1; GENERAL.NomParFrame{i}='PRelevSigma';
i=i+1; GENERAL.NomParFrame{i}='CPSigma';
i=i+1; GENERAL.NomParFrame{i}='DESigma';
i=i+1; GENERAL.NomParFrame{i}='PlSigma';
i=i+1; GENERAL.NomParFrame{i}='PitchSigma';
i=i+1; GENERAL.NomParFrame{i}='RaSigma';
i=i+1; GENERAL.NomParFrame{i}='FlaSigma';
for k=1:nform
    i=i+1; GENERAL.NomParFrame{i}=['FreqFormSigma',num2str(k)];
    i=i+1; GENERAL.NomParFrame{i}=['AbFormSigma',num2str(k)];
end
i=i+1; GENERAL.NomParFrame{i}='CASigma';
i=i+1; GENERAL.NomParFrame{i}='DeASigma';
i=i+1; GENERAL.NomParFrame{i}='DiASigma';
i=i+1; GENERAL.NomParFrame{i}='VASigma';
GENERAL.NumParSecundarios=i-GENERAL.NumParPrimarios;

for k=1:nesp
    i=i+1; GENERAL.NomParFrame{i}=['VrEsp',num2str(k)];
    i=i+1; GENERAL.NomParFrame{i}=['DisEsp',num2str(k)];
end
i=i+1; GENERAL.NomParFrame{i}='Relev';
GENERAL.NumParTerciarios=i-GENERAL.NumParPrimarios-GENERAL.NumParSecundarios;

i=0;
i=i+1; GENERAL.NomCorParFrame{i}='Pt';
i=i+1; GENERAL.NomCorParFrame{i}='PR';
i=i+1; GENERAL.NomCorParFrame{i}='CP';
i=i+1; GENERAL.NomCorParFrame{i}='DE';
i=i+1; GENERAL.NomCorParFrame{i}='Pl';
i=i+1; GENERAL.NomCorParFrame{i}='Pitch';
i=i+1; GENERAL.NomCorParFrame{i}='Ra';
i=i+1; GENERAL.NomCorParFrame{i}='Fla';
for k=1:nform
    i=i+1; GENERAL.NomCorParFrame{i}=['FrF',num2str(k)];
    i=i+1; GENERAL.NomCorParFrame{i}=['AbF',num2str(k)];
end
i=i+1; GENERAL.NomCorParFrame{i}='CA';
i=i+1; GENERAL.NomCorParFrame{i}='DeA';
i=i+1; GENERAL.NomCorParFrame{i}='DiA';
i=i+1; GENERAL.NomCorParFrame{i}='VA';

i=i+1; GENERAL.NomCorParFrame{i}='Pt~';
i=i+1; GENERAL.NomCorParFrame{i}='PR~';
i=i+1; GENERAL.NomCorParFrame{i}='CP~';
i=i+1; GENERAL.NomCorParFrame{i}='DE~';
i=i+1; GENERAL.NomCorParFrame{i}='Pl~';
i=i+1; GENERAL.NomCorParFrame{i}='Pitch~';
i=i+1; GENERAL.NomCorParFrame{i}='Ra~';
i=i+1; GENERAL.NomCorParFrame{i}='Fla~';
for k=1:nform
    i=i+1; GENERAL.NomCorParFrame{i}=['FF~',num2str(k)];
    i=i+1; GENERAL.NomCorParFrame{i}=['AB~',num2str(k)];
end
i=i+1; GENERAL.NomCorParFrame{i}='CA~';
i=i+1; GENERAL.NomCorParFrame{i}='DeA~';
i=i+1; GENERAL.NomCorParFrame{i}='DiA~';
i=i+1; GENERAL.NomCorParFrame{i}='VA~';

for k=1:nesp
    i=i+1; GENERAL.NomCorParFrame{i}=['VrE',num2str(k)];
    i=i+1; GENERAL.NomCorParFrame{i}=['DiE',num2str(k)];
end
i=i+1; GENERAL.NomCorParFrame{i}='Relev';


%
% Función que define los patrones
%
function [FicheroPatron,roiPatron,FicheroResumenPatrones]=DefinePatron
% FicheroPatron: Nombre de los ficheros Patrón (celda de mxn)
% roiPatron: Regiones de Interés (ROI) de los ficheros Patrón (celda de mxn)
% FicheroResumenPatrones: Nombre del fichero resumen de patrones (global PATRON)

global PATRON; % Estructura de datos de patrones
% Datos de cada patron de especie (se aproxima por una mezcla de N gaussianas)
    %PATRON.nesp: Número de especies en los patrones
    %PATRON.NombreEspecie: Nombre de las especies en los patrones
    %PATRON.coloresp: Color usado para las especies (matriz [tipo región,especie])
    %PATRON.ng: Número de gaussianas en la mezcla de funciones de distribución
    %PATRON.nt: Número de frames en los patrones (matriz [especie,muestra])
   

PATRON.nesp=3;
PATRON.NombreEspecie{1}='Sapo corredor';
PATRON.NombreEspecie{2}='Sapo corredor: suelta';
PATRON.NombreEspecie{3}='Sapo partero común';
PATRON.NombreEspecieLatin{1}='Epidalea Calamita';
PATRON.NombreEspecieLatin{2}='Epidalea Calamita: suelta';
PATRON.NombreEspecieLatin{3}='Alytes obstetricans';
PATRON.ng=2;
tablaColores=[0.00,0.00,1.00;...
              0.00,0.50,0.00;...
              1.00,0.00,0.00;...
              0.75,0.00,0.75];
PATRON.coloresp=tablaColores(1:PATRON.nesp+1,:);
FicheroResumenPatrones='sonidos\Procesados\Patrones\IANTA-PATRON';
FicheroPatron=cell(1,PATRON.nesp);
FicheroPatron{1}={'sonidos\Procesados\Patrones\0501_0-10',...
                  'sonidos\Procesados\Patrones\0707_0-10'};
FicheroPatron{2}={'sonidos\Procesados\Patrones\0503'};
FicheroPatron{3}={'sonidos\Procesados\Patrones\1325',...
                  'sonidos\Procesados\Patrones\1330'};

roiPatron=cell(1,PATRON.nesp);
roiPatron{1}={[  21,  78;
                127, 231;
                264, 366;
                395, 489;
                523, 628;
                664, 759;
                798, 900;
                933,1035],...
              [  20,  44;
                 73, 127;
                150, 194;
                221, 270;
                294, 339;
                362, 411;
                435, 484;
                507, 545;
                569, 611;
                635, 679;
                704, 747;
                771, 821;
                845, 887;
                910, 950;
                976,1018]};

roiPatron{2}={[  42,  58;
                128, 136;
                377, 388;
                745, 753;
                795, 812;
               1019,1034;
               1345,1356;
               1403,1418;
               1463,1479;
               1570,1577;
               1632,1642;
               1858,1865;
               1915,1925;
               2241,2251;
               2303,2313;
               2366,2380;
               2518,2531;
               2574,2582;
               2810,2817;
               2858,2873]};
              
roiPatron{3}={[  73,  89;
                150, 164;
                497, 513;
                876, 892;
               1433,1450;
               1808,1825;
               2206,2222;
               2556,2572;
               2933,2948;
               3406,3422;
               3677,3699;
               3721,3737],...
              [ 182, 194;
                263, 277;
                696, 711;
               1226,1239;
               1737,1751;
               2374,2388;
               2919,2934;
               3436,3451;
               4001,4015;
               4487,4501;
               4990,5005]};


%
% Función que convierte de la clase de un sonido de texto a número
%
function ClaseNum=ClaseTxt2Num(ClaseTxt)
% ClaseTxt: Clase de sonido en formato texto (vector)
% ClaseNum: Clase de sonido en formato número (vector)

global PATRON;
nesp=PATRON.nesp;
nelem=length(ClaseTxt);
ClaseNum=(nesp+1)*ones(nelem,1);
for k=1:nelem
    for iesp=1:nesp
        if strcmp(ClaseTxt(k),PATRON.NombreEspecie{iesp});
            ClaseNum(k)=iesp;
        end
    end
end

%----------------  Vectorización

function [x,esq,j] = kmeanlbg(d,k)
%KMEANLBG Vector quantisation using the Linde-Buzo-Gray algorithm [X,ESQ,J]=(D,K)
%
%Inputs:
% D contains data vectors (one per row)
% K is number of centres required
%
%Outputs:
% X is output row vectors (K rows)
% ESQ is mean square error
% J indicates which centre each data vector belongs to
%
%  Implements LBG K-means algorithm:
% Linde, Y., A. Buzo, and R. M. Gray,
% "An Algorithm for vector quantiser design,"
% IEEE Trans Communications, vol. 28, pp.84-95, Jan 1980.


%      Copyright (C) Mike Brookes 1998
%      Version: $Id: kmeanlbg.m 4497 2014-04-23 10:28:55Z dmb $
%
%   VOICEBOX is a MATLAB toolbox for speech processing.
%   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%   This program is free software; you can redistribute it and/or modify
%   it under the terms of the GNU General Public License as published by
%   the Free Software Foundation; either version 2 of the License, or
%   (at your option) any later version.
%
%   This program is distributed in the hope that it will be useful,
%   but WITHOUT ANY WARRANTY; without even the implied warranty of
%   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%   GNU General Public License for more details.
%
%   You can obtain a copy of the GNU General Public License from
%   http://www.gnu.org/copyleft/gpl.html or by writing to
%   Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

nc=size(d,2);
[x,esq,j]=v_kmeans(d,1);
m=1;
while m<k
    n=min(m,k-m);
    m=m+n;
    e=1e-4*sqrt(esq)*rand(1,nc);
    [x,esq,j]=v_kmeans(d,m,[x(1:n,:)+e(ones(n,1),:); x(1:n,:)-e(ones(n,1),:); x(n+1:m-n,:)]);
end


function [x,g,j,gg] = v_kmeans(d,k,x0,l)
%V_KMEANS Vector quantisation using K-means algorithm [X,ESQ,J]=(D,K,X0,L)
%
%  Inputs:
%
%    D(N,P)  contains N data vectors of dimension P
%    K       is number of centres required
%    X0(K,P) are the initial centres (optional)
%
%      or alternatively
%
%    X0      gives the initialization method
%            'f'   pick K random elements of D as the initial centres [default]
%            'p'   randomly divide D into K sets and choose the centroids
%    L       gives max number of iterations (use 0 if you just want to calculate G and J)
%
%  Outputs:
%
%    X(K,P)  is output row vectors (omitted if L=0)
%    G       is mean square error
%    J(N)    indicates which centre each data vector belongs to
%    GG(L)   gives the mean square error at the start of each iteration (omitted if L=0)
%
% It is often a good idea to scale the input data so that it has equal variance in each
% dimension before calling V_KMEANS.

%  Originally based on a routine by Chuck Anderson, anderson@cs.colostate.edu, 1996


%      Copyright (C) Mike Brookes 1998
%      Version: $Id: v_kmeans.m 4497 2014-04-23 10:28:55Z dmb $
%
%   VOICEBOX is a MATLAB toolbox for speech processing.
%   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%   This program is free software; you can redistribute it and/or modify
%   it under the terms of the GNU General Public License as published by
%   the Free Software Foundation; either version 2 of the License, or
%   (at your option) any later version.
%
%   This program is distributed in the hope that it will be useful,
%   but WITHOUT ANY WARRANTY; without even the implied warranty of
%   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%   GNU General Public License for more details.
%
%   You can obtain a copy of the GNU General Public License from
%   http://www.gnu.org/copyleft/gpl.html or by writing to
%   Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

memsize=50e6;         % Maximum amount of temporary memory to use (Bytes)
[n,p] = size(d);
nb=min(n,max(1,floor(memsize/(8*p*k))));    % block size for testing data points
nl=ceil(n/nb);                  % number of blocks
if nargin<4
    l=300;                  % very large max iteration count
    if nargin<3
        x0='f';             % use 'f' initialization mode
    end
end
if ischar(x0)
    if k<n
        if any(x0)=='p'                  % Initialize using a random partition
            ix=ceil(rand(1,n)*k);       % allocate to random clusters
            ix(rnsubset(k,n))=1:k;      % but force at least one point per cluster
            x=zeros(k,p);
            for i=1:k
                x(i,:)=mean(d(ix==i,:),1);
            end
        else                                % Forgy initialization: choose k random points [default]
            x=d(rnsubset(k,n),:);         % sample k centres without replacement
        end
    else
        x=d(mod((1:k)-1,n)+1,:);    % just include all points several times
    end
else
    x=x0;
end
m=zeros(n,1);           % minimum distance to a centre
j=zeros(n,1);           % index of closest centre
gg=zeros(l,1);
wp=ones(1,p);
kk=1:p;
kk=kk(ones(n,1),:);
kk=kk(:);

if l>0
    for ll=1:l                 % loop until x==y causes a break

        % find closest centre to each data point [m(:),j(:)] = distance, index

        ix=1;
        jx=n-nl*nb;
        for il=1:nl
            jx=jx+nb;        % increment upper limit
            ii=ix:jx;
            z = disteusq(d(ii,:),x,'x');
            [m(ii),j(ii)] = min(z,[],2);
            ix=jx+1;
        end
        y = x;              % save old centre list

        % calculate new centres as the mean of their assigned data values (or zero for unused centres)

        nd=full(sparse(j,1,1,k,1));         % number of points allocated to each centre
        md=max(nd,1);                       % remove zeros
        jj=j(:,wp);
        x=full(sparse(jj(:),kk,d(:),k,p))./md(:,wp);    % calculate the new means
        fx=find(nd==0);

        % if any centres are unused, assign them to data values that are not exactly on centres
        % choose randomly if there are more such points than needed

        if ~isempty(fx)
            q=find(m~=0);
            if length(q)<=length(fx)
                x(fx(1:length(q)),:)=d(q,:);
            else
                if length(fx)>1
                    [rr,ri]=sort(rand(length(q),1));
                    x(fx,:)=d(q(ri(1:length(fx))),:);
                else
                    x(fx,:) = d(q(ceil(rand(1)*length(q))),:);
                end
            end
        end

        % quit if the centres are unchanged

        gg(ll)=sum(m,1);
        if x==y
            break
        end
    end
    gg=gg(1:ll)/n;
%     ll % *** DEBUG ***
%     gg' % *** DEBUG ***
    g=gg(end);
else            % if l==0 then just calculate G and J (but rename as X and G)
    ix=1;
    jx=n-nl*nb;
    for il=1:nl
        jx=jx+nb;        % increment upper limit
        ii=ix:jx;
        z = disteusq(d(ii,:),x,'x');
        [m(ii),j(ii)] = min(z,[],2);
        ix=jx+1;
    end
    x=sum(m,1)/n;
    g=j;
end


function d=disteusq(x,y,mode,w)
%DISTEUSQ calculate euclidean, squared euclidean or mahanalobis distance D=(X,Y,MODE,W)
%
% Inputs: X,Y         Vector sets to be compared. Each row contains a data vector.
%                     X and Y must have the same number of columns.
%
%         MODE        Character string selecting the following options:
%                         'x'  Calculate the full distance matrix from every row of X to every row of Y
%                         'd'  Calculate only the distance between corresponding rows of X and Y
%                              The default is 'd' if X and Y have the same number of rows otherwise 'x'.
%                         's'  take the square-root of the result to give the euclidean distance.
%
%         W           Optional weighting matrix: the distance calculated is (x-y)*W*(x-y)'
%                     If W is a vector, then the matrix diag(W) is used.
%
% Output: D           If MODE='d' then D is a column vector with the same number of rows as the shorter of X and Y.
%                     If MODE='x' then D is a matrix with the same number of rows as X and the same number of columns as Y'.
%

%      Copyright (C) Mike Brookes 1998
%      Version: $Id: disteusq.m 713 2011-10-16 14:45:43Z dmb $
%
%   VOICEBOX is a MATLAB toolbox for speech processing.
%   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

[nx,p]=size(x);
ny=size(y,1);
if nargin<3 | isempty(mode) 
    mode='0'; 
end
if any(mode=='d') | (mode~='x' & nx==ny)

    % Do pairwise distance calculation
    nx=min(nx,ny);
    z=double(x(1:nx,:))-double(y(1:nx,:));
    if nargin<4
        d=sum(z.*conj(z),2);
    elseif min(size(w))==1
        wv=w(:).';
        d=sum(z.*wv(ones(size(z,1),1),:).*conj(z),2);
    else
        d=sum(z*w.*conj(z),2);
    end
else

    % Calculate full distance matrix

    if p>1

        % x and y are matrices

        if nargin<4
            z=permute(double(x(:,:,ones(1,ny))),[1 3 2])-permute(double(y(:,:,ones(1,nx))),[3 1 2]);
            d=sum(z.*conj(z),3);
        else
            nxy=nx*ny;
            z=reshape(permute(double(x(:,:,ones(1,ny))),[1 3 2])-permute(double(y(:,:,ones(1,nx))),[3 1 2]),nxy,p);
            if min(size(w))==1
                wv=w(:).';
                d=reshape(sum(z.*wv(ones(nxy,1),:).*conj(z),2),nx,ny);
            else
                d=reshape(sum(z*w.*conj(z),2),nx,ny);
            end
        end
    else

        % x and y are vectors

        z=double(x(:,ones(1,ny)))-double(y(:,ones(1,nx))).';
        if nargin<4
            d=z.*conj(z);
        else
            d=w*z.*conj(z);
        end
    end
end
if any(mode=='s')
    d=sqrt(d);
end

function m = rnsubset(k,n)
%RNSUBSET choose k distinct random integers from 1:n M=(K,N)
%
%  Inputs:
%
%    K is number of disinct integers required from the range 1:N
%    N specifies the range - we must have K<=N
%
%  Outputs:
%
%    M(1,K) contains the output numbers

%      Copyright (C) Mike Brookes 2006
%      Version: $Id: rnsubset.m 713 2011-10-16 14:45:43Z dmb $
%
%   VOICEBOX is a MATLAB toolbox for speech processing.
%   Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if k>n
    error('rnsubset: k must be <= n');
end
% We use two algorithms according to the values of k and n
[f,e]=log2(n);
if k>0.03*n*(e-1)
    [v,m]=sort(rand(1,n)); % for large k, just do a random permutation
else
    v=ceil(rand(1,k).*(n:-1:n-k+1));
    m=1:n;
    for i=1:k
        j=v(i)+i-1;
        x=m(i);
        m(i)=m(j);
        m(j)=x;
    end
end
m=m(1:k);


%
% Función que calcula la matriz de confusion de una técnica de clasificación
%
function MatrizConfusion(hoja,algoritmo)
% hoja: Hoja con datos de clase de sonido para cada fichero excel y cada algoritmo
% algoritmo: identificador del algoritmo del que se obtiene la matriz de confusión

global PATRON;
tamhoja=size(hoja);
nfil=tamhoja(1);
ncol=tamhoja(2);
MatClas=cell2mat(hoja(2:nfil,2:ncol));
SonidoReal=MatClas(:,1);
ClaseSonido=MatClas(:,2*algoritmo);

nesp=PATRON.nesp;

% Cálculo de la matriz de confusión (valores absolutos)
confusion=confusionmat(SonidoReal,ClaseSonido);
nfichesp=sum(confusion,2);
display(confusion);

% Cálculo de la matriz de confusión (porcentajes)
for iesp=1:nesp
    confusion(iesp,:)=confusion(iesp,:) / nfichesp(iesp)*100;
end
display(confusion);


%
% Función que calcula la evaluación de las técnicas de clasificación
%
function performance=EvaluacionClasificacion(hoja)
% hoja: Hoja con datos de clase de sonido para cada fichero excel y cada algoritmo
% performance: matriz de prestaciones (algoritmo,indicador)
%               1: Accuracy
%               2: Error Rate
%               3: Precision
%               4: Sensitivity
%               5: Specificity

tamhoja=size(hoja);
nfil=tamhoja(1);
ncol=tamhoja(2);
MatClas=cell2mat(hoja(2:nfil,2:ncol));
SonidoReal=MatClas(:,1);

%nalg=(ncol-2)/2;
%nomAlg={'ArDec','kNN','SVM','RegLog','Neur','Discr','Bayes','DisMin','MaxVer','HMM','NeuRec'};
nomAlg={'DecTr','kNN','SVM','LogReg','Neur','Discr','Bayes','MinDis','MaxLik','HMM','RecNN'};
ordalg=[8,9,1:7,10,11];
nalg=length(ordalg);
RowText=cell(nalg,1);
performance=zeros(nalg,5);
for ix=1:nalg
    ialg=ordalg(ix);
    ClaseSonido=MatClas(:,2*ialg);
    RowText{ix}=nomAlg{ialg};

    % Cálculo de la matriz de confusión (valores absolutos)
    confusion=confusionmat(SonidoReal,ClaseSonido);
    P=sum(confusion,2); %Positive
    TP=diag(confusion); %True Positive
    FN=P-TP; %False Negative

    PP=sum(confusion,1)'; %Predicted Positive
    FP=PP-TP; %False Positive
    N=sum(P)-P; %Negative
    TN=N-FP; %True Negative

    % Prestaciones para cada clase
    ACCx=(TP+TN)./(TP+TN+FP+FN); %Accuracy
    ERRx=(FP+FN)./(TP+TN+FP+FN); %Error Rate
    PRCx=TP./(TP+FP); %Precision
    SNSx=TP./(TP+FN); %Sensitivity
    SPCx=TN./(FP+TN); %Specificity
    PRCx(isnan(PRCx)) = 0; %Elimina NaN

    % Prestaciones para el conjunto de las clases
    ACCm = mean(ACCx);
    ERRm = mean(ERRx);
    PRCm = mean(PRCx);
    SNSm = mean(SNSx);
    SPCm = mean(SPCx);
    ROCm = sqrt( SNSm.^2 + SPCm.^2 ) / sqrt(2); %ROC
    F1m = 2* (PRCm.*SNSm) ./ (PRCm+SNSm); %F1 score

    performance(ix,1)=ACCm; %Accuracy
    performance(ix,2)=ERRm; %Error Rate
    performance(ix,3)=PRCm; %Precision
    performance(ix,4)=SNSm; %Sensitivity
    performance(ix,5)=SPCm; %Specificity
    performance(ix,6)=ROCm; %ROC
    performance(ix,7)=F1m; %F1 score
end
ACC = performance(:,1);
ERR = performance(:,2);
PRC = performance(:,3);
SNS = performance(:,4);
SPC = performance(:,5);
ROC = performance(:,6);
F1 = performance(:,7);
T = table(ACC,ERR,PRC,SNS,SPC,ROC,F1,'RowNames',RowText);
disp(T);
kk=1;


%
% Función que traza el gráfico ROC de comparación de algoritmos
%   ROC: Receiver Operating Characteristic
%
function GraficaROC(hoja)
% hoja: Hoja con datos de clase de sonido para cada fichero excel y cada algoritmo

%nomAlg={'ArDec','kNN','SVM','RegLog','Neur','Discr','Bayes','DisMin','MaxVer','HMM','NeuRec'};
nomAlg={'DecTr','kNN','SVM','LogReg','Neur','Discr','Bayes','MinDis','MaxLik','HMM','RecNN'};
ordalg=[8,9,1:7,10,11];
MarkerVec=['+','o','*','s','d','p'];
performance=EvaluacionClasificacion(hoja);
nalg=length(nomAlg);
xText=cell(nalg,1);
for ialg=1:nalg
    xText{ialg}=nomAlg{ordalg(ialg)};
    %xText{ialg}=nomAlg{ialg};
    imarker=rem(ialg-1,length(MarkerVec))+1;
    x=100*(1-performance(ialg,5)); %Fall-out (1-Specificity)=False Positive Rate
    y=100*performance(ialg,4); %Sensitivity=True Positive Rate
    plot(x,y,...
        'Marker',MarkerVec(imarker),'LineStyle','none');
    hold all;
end
plot([0,100],[0,100],':k');
legend(xText,'Location','NorthEast');
xlabel('False Positive Rate (%)');
ylabel('True Positive Rate (%)');
axis([0,100,0,100]);


%
% Función que realiza la comparación de algoritmos mediante el coeficiente kappa de Cohen
%
function CohenKappaAnalysis(hoja)
% hoja: Hoja con datos de clase de sonido para cada fichero excel y cada algoritmo

global PATRON;

nclases=PATRON.nesp;
tamhoja=size(hoja);
nfil=tamhoja(1);
ncol=tamhoja(2);
MatClas=cell2mat(hoja(2:nfil,2:ncol));
%nalg=(ncol-2)/2;

%nomAlg={'ArDec','kNN','SVM','RegLog','Neur','Discr','Bayes','DisMin','MaxVer','HMM','NeuRec'};
nomAlg={'DecTr','kNN','SVM','LogReg','Neur','Discr','Bayes','MinDis','MaxLik','HMM','RecNN'};
ordalg=[8,9,1:7,10,11];
nalg=length(nomAlg);
NomAlgOrd=cell(1,nalg);
for i=1:nalg
    NomAlgOrd{i}=nomAlg{ordalg(i)};
end

% Calcula la matriz kappa de Cohen
KappaMat=zeros(nalg,nalg);
for ix=1:nalg
    ialg=ordalg(ix);
    ClaseAlgI=MatClas(:,2*ialg);
    for jx=ix:nalg
        jalg=ordalg(jx);
        ClaseAlgJ=MatClas(:,2*jalg);
        nfich=length(ClaseAlgI);
        X=zeros(nclases,nclases);
        for ifich=1:nfich
            i=ClaseAlgI(ifich);
            j=ClaseAlgJ(ifich);
            X(i,j)=X(i,j)+1;
        end %de ifich
        [po,pe,k,sek,ci,km,ratio,var,z,p]=kappa(X);
        KappaMat(ix,jx)=min(k,1);
        KappaMat(jx,ix)=min(k,1);
    end %de jalg
end %de ialg

% Traza el gráfico de la matriz kappa
colormap summer;
mapacolor=colormap;
ncolor=size(mapacolor,1);
for x=1:nalg
    for y=1:nalg
        z=max(0,KappaMat(x,y));
        color=mapacolor(round(z*(ncolor-1)+1),:);
        rectangle('Position',[x-0.5,y-0.5,1,1],'FaceColor',color);
        ztexto=[num2str(100*z,'%2.0f'),'%'];
        text(x,y,ztexto,'HorizontalAlignment','center');
    end
end
axis([0.5,nalg+0.5,0.5,nalg+0.5]);
%----- Etiquetas del eje horizontal girados 30º
set(gca,'XTick',1:nalg,'XTickLabel','');
hx = get(gca,'XLabel');
set(hx,'Units','data'); 
pos = get(hx,'Position'); 
t=zeros(1,nalg);
for i = 1:nalg
    t(i)=text(i,pos(2),NomAlgOrd{i}); 
end 
set(t,'Rotation',30,'HorizontalAlignment','right')
%----- Etiquetas del eje vertical
set(gca,'YTick',1:nalg+1,'YTickLabel',NomAlgOrd);
colorbar('YTick',1:ncolor/4:ncolor+1,'YTickLabel',...
    {'0%','25%','50%','75%','100%'});

% Traza el gráfico de cajas de la kappa media de cada algoritmo
figure;
Kappax=KappaMat;
A=diag(ones(1,nalg));
Kappax(A==1)=nan;
boxplot(Kappax,NomAlgOrd,'labelorientation','inline');
ylabel('Cohen''s \kappa rate');


function [po,pe,k,sek,ci,km,ratio,var,z,p]=kappa(varargin)
% KAPPA: This function computes the Cohen's kappa coefficient.
% Cohen's kappa coefficient is a statistical measure of inter-rater
% reliability. It is generally thought to be a more robust measure than
% simple percent agreement calculation since k takes into account the
% agreement occurring by chance.
% Kappa provides a measure of the degree to which two judges, A and B,
% concur in their respective sortings of N items into k mutually exclusive
% categories. A 'judge' in this context can be an individual human being, a
% set of individuals who sort the N items collectively, or some non-human
% agency, such as a computer program or diagnostic test, that performs a
% sorting on the basis of specified criteria.
% The original and simplest version of kappa is the unweighted kappa
% coefficient introduced by J. Cohen in 1960. When the categories are
% merely nominal, Cohen's simple unweighted coefficient is the only form of
% kappa that can meaningfully be used. If the categories are ordinal and if
% it is the case that category 2 represents more of something than category
% 1, that category 3 represents more of that same something than category
% 2, and so on, then it is potentially meaningful to take this into
% account, weighting each cell of the matrix in accordance with how near it
% is to the cell in that row that includes the absolutely concordant items.
% This function can compute a linear weights or a quadratic weights.
%
% Syntax: [po,pe,k,sek,ci,km,ratio,var,z,p]=kappa(X,W,ALPHA)
%      
%     Inputs:
%           X - square data matrix
%           W - Weight (0 = unweighted; 1 = linear weighted; 2 = quadratic
%           weighted; -1 = display all. Default=0)
%           ALPHA - default=0.05.
%
%     Outputs:
%           po: Observed agreement
%           pe: Random agreement
%           k: Cohen's kappa
%               k<=0: Poor agreement
%               0.0<k<=0.2: Slight agreement
%               0.2<k<=0.4: Fair agreement
%               0.4<k<=0.6: Moderate agreement
%               0.6<k<=0.8 Substantial agreement
%               0.8<k<=1.0: Perfect agreement
%           sekf: kappa error
%           ci: kappa confidence interval
%           km: Maximum possible kappa, given the observed marginal frequencies
%           ratio: kappa observed as proportion of maximum possible
%           var: variance
%           z: normalized kappa
%           p: observed agreement hypotesis test
%               p<0.05: Reject null hypotesis: observed agreement is not accidental
%               p>=0.05: Accept null hypotesis: observed agreement is accidental
%
%      Example: 
%
%           x=[88 14 18; 10 40 10; 2 6 12];
%
%           Calling on Matlab the function: kappa(x)
%
%           Answer is:
%
% UNWEIGHTED COHEN'S KAPPA
% --------------------------------------------------------------------------------
% Observed agreement (po) = 0.7000
% Random agreement (pe) = 0.4100
% Agreement due to true concordance (po-pe) = 0.2900
% Residual not random agreement (1-pe) = 0.5900
% Cohen's kappa = 0.4915
% kappa error = 0.0549
% kappa C.I. (alpha = 0.0500) = 0.3839     0.5992
% Maximum possible kappa, given the observed marginal frequencies = 0.8305
% k observed as proportion of maximum possible = 0.5918
% Moderate agreement
% Variance = 0.0031     z (k/sqrt(var)) = 8.8347    p = 0.0000
% Reject null hypotesis: observed agreement is not accidental
%
%           Created by Giuseppe Cardillo
%           giuseppe.cardillo-edta@poste.it
%
% To cite this file, this would be an appropriate format:
% Cardillo G. (2007) Cohens kappa: compute the Cohen's kappa ratio on a 2x2 matrix.   
% http://www.mathworks.com/matlabcentral/fileexchange/15365



%Input Error handling

args=cell(varargin);
nu=numel(args);
if isempty(nu)
    error('Warning: Matrix of data is missed...')
elseif nu>3
    error('Warning: Max three input data are required')
end
default.values = {[],0,0.05};
default.values(1:nu) = args;
[x w alpha] = deal(default.values{:});

if isempty(x)
    error('Warning: X matrix is empty...')
end
if isvector(x)
    error('Warning: X must be a matrix not a vector')
end
if ~all(isfinite(x(:))) || ~all(isnumeric(x(:)))
    error('Warning: all X values must be numeric and finite')
end   
if ~isequal(x(:),round(x(:)))
    error('Warning: X data matrix values must be whole numbers')
end

m=size(x);
if ~isequal(m(1),m(2))
    error('Input matrix must be a square matrix')
end
if nu>1 %eventually check weight
    if ~isscalar(w) || ~isfinite(w) || ~isnumeric(w) || isempty(w)
        error('Warning: it is required a scalar, numeric and finite Weight value.')
    end
    a=-1:1:2;
    if isempty(a(a==w))%check if w is -1 0 1 or 2
        error('Warning: Weight must be -1 0 1 or 2.')
    end
end
if nu>2 %eventually check alpha
    if ~isscalar(alpha) || ~isnumeric(alpha) || ~isfinite(alpha) || isempty(alpha)
        error('Warning: it is required a numeric, finite and scalar ALPHA value.');
    end
    if alpha <= 0 || alpha >= 1 %check if alpha is between 0 and 1
        error('Warning: ALPHA must be comprised between 0 and 1.')
    end
end
clear args default nu

m(2)=[]; 
if w==0 || w==-1
    f=diag(ones(1,m)); %unweighted
    [po,pe,k,sek,ci,km,ratio,var,z,p]=kcomp(x,f,alpha);
end
if w==1 || w==-1
    J=repmat((1:1:m),m,1);
    I=flipud(rot90(J));
    f=1-abs(I-J)./(m-1); %linear weight
    [po,pe,k,sek,ci,km,ratio,var,z,p]=kcomp(x,f,alpha);
end
if w==2 || w==-1
    J=repmat((1:1:m),m,1);
    I=flipud(rot90(J));
    f=1-((I-J)./(m-1)).^2; %quadratic weight
    [po,pe,k,sek,ci,km,ratio,var,z,p]=kcomp(x,f,alpha);
end



function [po,pe,k,sek,ci,km,ratio,var,z,p]=kcomp(x,f,alpha)
%x: square data matrix
%f: square weight matrix
%alpha: ALPHA - default=0.05.
%po: Observed agreement
%pe: Random agreement
%k: Cohen's kappa
    %k<=0: Poor agreement
    %0.0<k<=0.2: Slight agreement
    %0.2<k<=0.4: Fair agreement
    %0.4<k<=0.6: Moderate agreement
    %0.6<k<=0.8 Substantial agreement
    %0.8<k<=1.0: Perfect agreement
%sekf: kappa error
%ci: kappa confidence interval
%km: Maximum possible kappa, given the observed marginal frequencies
%ratio: kappa observed as proportion of maximum possible
%var: variance
%z: normalized kappa
%p: observed agreement hypotesis test
    %p<0.05: Reject null hypotesis: observed agreement is not accidental
    %p>=0.05: Accept null hypotesis: observed agreement is accidental

m=length(x);
n=sum(x(:)); %Sum of Matrix elements
x=x./n; %proportion
r=sum(x,2); %rows sum
s=sum(x); %columns sum
Ex=r*s; %expected proportion for random agree
pom=sum(min([r';s]));
po=sum(sum(x.*f));
pe=sum(sum(Ex.*f));
k=(po-pe)/(1-pe);
km=(pom-pe)/(1-pe); %maximum possible kappa, given the observed marginal frequencies
ratio=k/km; %observed as proportion of maximum possible
sek=sqrt((po*(1-po))/(n*(1-pe)^2)); %kappa standard error for confidence interval
ci=k+([-1 1].*(abs(-realsqrt(2)*erfcinv(alpha))*sek)); %k confidence interval
wbari=r'*f;
wbarj=s*f;
wbar=repmat(wbari',1,m)+repmat(wbarj,m,1);
a=Ex.*((f-wbar).^2);
var=(sum(a(:))-pe^2)/(n*((1-pe)^2)); %variance
z=k/sqrt(var); %normalized kappa
p=(1-0.5*erfc(-abs(z)/realsqrt(2)))*2;