function [trainedClassifier, validationAccuracy] = trainClassifier(trainingData) % [trainedClassifier, validationAccuracy] = trainClassifier(trainingData) % Returns a trained classifier and its accuracy. This code recreates the % classification model trained in Classification Learner app. Use the % generated code to automate training the same model with new data, or to % learn how to programmatically train models. % % Input: % trainingData: A table containing the same predictor and response % columns as those imported into the app. % % Output: % trainedClassifier: A struct containing the trained classifier. The % struct contains various fields with information about the trained % classifier. % % trainedClassifier.predictFcn: A function to make predictions on new % data. % % validationAccuracy: A double containing the accuracy as a % percentage. In the app, the Models pane displays this overall % accuracy score for each model. % % Use the code to train the model with new data. To retrain your % classifier, call the function from the command line with your original % data or new data as the input argument trainingData. % % For example, to retrain a classifier trained with the original data set % T, enter: % [trainedClassifier, validationAccuracy] = trainClassifier(T) % % To make predictions with the returned 'trainedClassifier' on new data T2, % use % yfit = trainedClassifier.predictFcn(T2) % % T2 must be a table containing at least the same predictor columns as used % during training. For details, enter: % trainedClassifier.HowToPredict % Auto-generated by MATLAB on 21-Nov-2023 19:25:01 % Extract predictors and response % This code processes the data into the right shape for training the % model. inputTable = trainingData; predictorNames = {'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V40', 'V41', 'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V50', 'V51', 'V52', 'V53', 'V54', 'V55', 'V56', 'V57', 'V58', 'V59', 'V60', 'V61', 'V62', 'V63', 'V64', 'V65', 'V66', 'V67', 'V68', 'V69', 'V70', 'V71', 'V72', 'V73', 'V74', 'V75', 'V76', 'V77', 'V78', 'V79', 'V80', 'V81', 'V82', 'V83', 'V84', 'V85', 'V86', 'V87', 'V88', 'V89', 'V90', 'V91', 'V92', 'V93', 'V94', 'V95', 'V96', 'V97', 'V98', 'V99', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128'}; predictors = inputTable(:, predictorNames); response = inputTable.Class; isCategoricalPredictor = [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false]; % Feature Ranking and Selection % Replace Inf/-Inf values with NaN to prepare data for normalization predictors = standardizeMissing(predictors, {Inf, -Inf}); % Normalize data for feature ranking predictorMatrix = normalize(predictors, "DataVariable", ~isCategoricalPredictor); % Rank features using MRMR algorithm featureIndex = fscmrmr(... predictorMatrix, ... response); numFeaturesToKeep = 100; includedPredictorNames = predictors.Properties.VariableNames(featureIndex(1:numFeaturesToKeep)); predictors = predictors(:,includedPredictorNames); isCategoricalPredictor = isCategoricalPredictor(featureIndex(1:numFeaturesToKeep)); % Train a classifier % This code specifies all the classifier options and trains the classifier. classificationKNN = fitcknn(... predictors, ... response, ... 'Distance', 'Euclidean', ... 'Exponent', [], ... 'NumNeighbors', 1, ... 'DistanceWeight', 'Equal', ... 'Standardize', true, ... 'ClassNames', [1; 2; 3; 4; 5; 6]); % Create the result struct with predict function predictorExtractionFcn = @(t) t(:, predictorNames); featureSelectionFcn = @(x) x(:,includedPredictorNames); knnPredictFcn = @(x) predict(classificationKNN, x); trainedClassifier.predictFcn = @(x) knnPredictFcn(featureSelectionFcn(predictorExtractionFcn(x))); % Add additional fields to the result struct trainedClassifier.RequiredVariables = {'V1', 'V10', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V11', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V12', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V2', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V3', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V4', 'V40', 'V41', 'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V5', 'V50', 'V51', 'V52', 'V53', 'V54', 'V55', 'V56', 'V57', 'V58', 'V59', 'V6', 'V60', 'V61', 'V62', 'V63', 'V64', 'V65', 'V66', 'V67', 'V68', 'V69', 'V7', 'V70', 'V71', 'V72', 'V73', 'V74', 'V75', 'V76', 'V77', 'V78', 'V79', 'V8', 'V80', 'V81', 'V82', 'V83', 'V84', 'V85', 'V86', 'V87', 'V88', 'V89', 'V9', 'V90', 'V91', 'V92', 'V93', 'V94', 'V95', 'V96', 'V97', 'V98', 'V99'}; trainedClassifier.ClassificationKNN = classificationKNN; trainedClassifier.About = 'This struct is a trained model exported from Classification Learner R2022b.'; trainedClassifier.HowToPredict = sprintf('To make predictions on a new table, T, use: \n yfit = c.predictFcn(T) \nreplacing ''c'' with the name of the variable that is this struct, e.g. ''trainedModel''. \n \nThe table, T, must contain the variables returned by: \n c.RequiredVariables \nVariable formats (e.g. matrix/vector, datatype) must match the original training data. \nAdditional variables are ignored. \n \nFor more information, see How to predict using an exported model.'); % Extract predictors and response % This code processes the data into the right shape for training the % model. inputTable = trainingData; predictorNames = {'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V40', 'V41', 'V42', 'V43', 'V44', 'V45', 'V46', 'V47', 'V48', 'V49', 'V50', 'V51', 'V52', 'V53', 'V54', 'V55', 'V56', 'V57', 'V58', 'V59', 'V60', 'V61', 'V62', 'V63', 'V64', 'V65', 'V66', 'V67', 'V68', 'V69', 'V70', 'V71', 'V72', 'V73', 'V74', 'V75', 'V76', 'V77', 'V78', 'V79', 'V80', 'V81', 'V82', 'V83', 'V84', 'V85', 'V86', 'V87', 'V88', 'V89', 'V90', 'V91', 'V92', 'V93', 'V94', 'V95', 'V96', 'V97', 'V98', 'V99', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V120', 'V121', 'V122', 'V123', 'V124', 'V125', 'V126', 'V127', 'V128'}; predictors = inputTable(:, predictorNames); response = inputTable.Class; isCategoricalPredictor = [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false]; % Perform cross-validation KFolds = 5; cvp = cvpartition(response, 'KFold', KFolds); % Initialize the predictions to the proper sizes validationPredictions = response; numObservations = size(predictors, 1); numClasses = 6; validationScores = NaN(numObservations, numClasses); for fold = 1:KFolds trainingPredictors = predictors(cvp.training(fold), :); trainingResponse = response(cvp.training(fold), :); foldIsCategoricalPredictor = isCategoricalPredictor; % Feature Ranking and Selection % Replace Inf/-Inf values with NaN to prepare data for normalization trainingPredictors = standardizeMissing(trainingPredictors, {Inf, -Inf}); % Normalize data for feature ranking predictorMatrix = normalize(trainingPredictors, "DataVariable", ~foldIsCategoricalPredictor); % Rank features using MRMR algorithm featureIndex = fscmrmr(... predictorMatrix, ... trainingResponse); numFeaturesToKeep = 100; includedPredictorNames = trainingPredictors.Properties.VariableNames(featureIndex(1:numFeaturesToKeep)); trainingPredictors = trainingPredictors(:,includedPredictorNames); foldIsCategoricalPredictor = foldIsCategoricalPredictor(featureIndex(1:numFeaturesToKeep)); % Train a classifier % This code specifies all the classifier options and trains the classifier. classificationKNN = fitcknn(... trainingPredictors, ... trainingResponse, ... 'Distance', 'Euclidean', ... 'Exponent', [], ... 'NumNeighbors', 1, ... 'DistanceWeight', 'Equal', ... 'Standardize', true, ... 'ClassNames', [1; 2; 3; 4; 5; 6]); % Create the result struct with predict function featureSelectionFcn = @(x) x(:,includedPredictorNames); knnPredictFcn = @(x) predict(classificationKNN, x); validationPredictFcn = @(x) knnPredictFcn(featureSelectionFcn(x)); % Add additional fields to the result struct % Compute validation predictions validationPredictors = predictors(cvp.test(fold), :); [foldPredictions, foldScores] = validationPredictFcn(validationPredictors); % Store predictions in the original order validationPredictions(cvp.test(fold), :) = foldPredictions; validationScores(cvp.test(fold), :) = foldScores; end % Compute validation accuracy correctPredictions = (validationPredictions == response); isMissing = isnan(response); correctPredictions = correctPredictions(~isMissing); validationAccuracy = sum(correctPredictions)/length(correctPredictions);