function [trainedClassifier, validationAccuracy] = trainClassifier(trainingData) % [trainedClassifier, validationAccuracy] = trainClassifier(trainingData) % Returns a trained classifier and its accuracy. This code recreates the % classification model trained in Classification Learner app. Use the % generated code to automate training the same model with new data, or to % learn how to programmatically train models. % % Input: % trainingData: A table containing the same predictor and response % columns as those imported into the app. % % Output: % trainedClassifier: A struct containing the trained classifier. The % struct contains various fields with information about the trained % classifier. % % trainedClassifier.predictFcn: A function to make predictions on new % data. % % validationAccuracy: A double containing the accuracy as a % percentage. In the app, the Models pane displays this overall % accuracy score for each model. % % Use the code to train the model with new data. To retrain your % classifier, call the function from the command line with your original % data or new data as the input argument trainingData. % % For example, to retrain a classifier trained with the original data set % T, enter: % [trainedClassifier, validationAccuracy] = trainClassifier(T) % % To make predictions with the returned 'trainedClassifier' on new data T2, % use % yfit = trainedClassifier.predictFcn(T2) % % T2 must be a table containing at least the same predictor columns as used % during training. For details, enter: % trainedClassifier.HowToPredict % Auto-generated by MATLAB on 21-Nov-2023 19:10:35 % Extract predictors and response % This code processes the data into the right shape for training the % model. inputTable = trainingData; predictorNames = {'a01', 'a02', 'a03', 'a04', 'a05', 'a06', 'a07', 'a08', 'a09', 'a10', 'a11', 'a12', 'a13', 'a14', 'a15', 'a16', 'a17', 'a18', 'a19', 'a20', 'a21', 'a22', 'a23', 'a24', 'a25', 'a26', 'a27', 'a28', 'a29', 'a30', 'a31', 'a32', 'a33', 'a34'}; predictors = inputTable(:, predictorNames); response = inputTable.class; isCategoricalPredictor = [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false]; % Feature Ranking and Selection % Replace Inf/-Inf values with NaN to prepare data for normalization predictors = standardizeMissing(predictors, {Inf, -Inf}); % Normalize data for feature ranking predictorMatrix = normalize(predictors, "DataVariable", ~isCategoricalPredictor); % Rank features using MRMR algorithm featureIndex = fscmrmr(... predictorMatrix, ... response); numFeaturesToKeep = 10; includedPredictorNames = predictors.Properties.VariableNames(featureIndex(1:numFeaturesToKeep)); predictors = predictors(:,includedPredictorNames); isCategoricalPredictor = isCategoricalPredictor(featureIndex(1:numFeaturesToKeep)); % Train a classifier % This code specifies all the classifier options and trains the classifier. classificationKernel = fitckernel(... predictors, ... response, ... 'Learner', 'svm', ... 'NumExpansionDimensions', 'auto', ... 'Lambda', 'auto', ... 'KernelScale', 'auto', ... 'IterationLimit', 1000, ... 'ClassNames', categorical({'b'; 'g'})); % Create the result struct with predict function predictorExtractionFcn = @(t) t(:, predictorNames); featureSelectionFcn = @(x) x(:,includedPredictorNames); kernelPredictFcn = @(x) predict(classificationKernel, x); trainedClassifier.predictFcn = @(x) kernelPredictFcn(featureSelectionFcn(predictorExtractionFcn(x))); % Add additional fields to the result struct trainedClassifier.RequiredVariables = {'a01', 'a02', 'a03', 'a04', 'a05', 'a06', 'a07', 'a08', 'a09', 'a10', 'a11', 'a12', 'a13', 'a14', 'a15', 'a16', 'a17', 'a18', 'a19', 'a20', 'a21', 'a22', 'a23', 'a24', 'a25', 'a26', 'a27', 'a28', 'a29', 'a30', 'a31', 'a32', 'a33', 'a34'}; trainedClassifier.ClassificationKernel = classificationKernel; trainedClassifier.About = 'This struct is a trained model exported from Classification Learner R2022b.'; trainedClassifier.HowToPredict = sprintf('To make predictions on a new table, T, use: \n yfit = c.predictFcn(T) \nreplacing ''c'' with the name of the variable that is this struct, e.g. ''trainedModel''. \n \nThe table, T, must contain the variables returned by: \n c.RequiredVariables \nVariable formats (e.g. matrix/vector, datatype) must match the original training data. \nAdditional variables are ignored. \n \nFor more information, see How to predict using an exported model.'); % Extract predictors and response % This code processes the data into the right shape for training the % model. inputTable = trainingData; predictorNames = {'a01', 'a02', 'a03', 'a04', 'a05', 'a06', 'a07', 'a08', 'a09', 'a10', 'a11', 'a12', 'a13', 'a14', 'a15', 'a16', 'a17', 'a18', 'a19', 'a20', 'a21', 'a22', 'a23', 'a24', 'a25', 'a26', 'a27', 'a28', 'a29', 'a30', 'a31', 'a32', 'a33', 'a34'}; predictors = inputTable(:, predictorNames); response = inputTable.class; isCategoricalPredictor = [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false]; % Perform cross-validation KFolds = 5; cvp = cvpartition(response, 'KFold', KFolds); % Initialize the predictions to the proper sizes validationPredictions = response; numObservations = size(predictors, 1); numClasses = 2; validationScores = NaN(numObservations, numClasses); for fold = 1:KFolds trainingPredictors = predictors(cvp.training(fold), :); trainingResponse = response(cvp.training(fold), :); foldIsCategoricalPredictor = isCategoricalPredictor; % Feature Ranking and Selection % Replace Inf/-Inf values with NaN to prepare data for normalization trainingPredictors = standardizeMissing(trainingPredictors, {Inf, -Inf}); % Normalize data for feature ranking predictorMatrix = normalize(trainingPredictors, "DataVariable", ~foldIsCategoricalPredictor); % Rank features using MRMR algorithm featureIndex = fscmrmr(... predictorMatrix, ... trainingResponse); numFeaturesToKeep = 10; includedPredictorNames = trainingPredictors.Properties.VariableNames(featureIndex(1:numFeaturesToKeep)); trainingPredictors = trainingPredictors(:,includedPredictorNames); foldIsCategoricalPredictor = foldIsCategoricalPredictor(featureIndex(1:numFeaturesToKeep)); % Train a classifier % This code specifies all the classifier options and trains the classifier. classificationKernel = fitckernel(... trainingPredictors, ... trainingResponse, ... 'Learner', 'svm', ... 'NumExpansionDimensions', 'auto', ... 'Lambda', 'auto', ... 'KernelScale', 'auto', ... 'IterationLimit', 1000, ... 'ClassNames', categorical({'b'; 'g'})); % Create the result struct with predict function featureSelectionFcn = @(x) x(:,includedPredictorNames); kernelPredictFcn = @(x) predict(classificationKernel, x); validationPredictFcn = @(x) kernelPredictFcn(featureSelectionFcn(x)); % Add additional fields to the result struct % Compute validation predictions validationPredictors = predictors(cvp.test(fold), :); [foldPredictions, foldScores] = validationPredictFcn(validationPredictors); % Store predictions in the original order validationPredictions(cvp.test(fold), :) = foldPredictions; validationScores(cvp.test(fold), :) = foldScores; end % Compute validation accuracy correctPredictions = (validationPredictions == response); isMissing = ismissing(response); correctPredictions = correctPredictions(~isMissing); validationAccuracy = sum(correctPredictions)/length(correctPredictions);