#UNet++(SA)–MFO Framework for Early-Stage AD Prediction # INPUT: # - X_MRI: sMRI images (n_samples × H_MRI × W_MRI × C_MRI) # - X_PET: PET images (n_samples × H_PET × W_PET × C_PET) # - X_NP: Neuropsychological test scores (n_samples × 10 features) # - y: Labels {0: AD, 1: MCI, 2: CN} # # OUTPUT: # - y_pred: Predicted class labels # - confidence_scores: Prediction probabilities # ============================================================================ # STAGE 1: DATA PREPROCESSING FUNCTION Preprocess_Images(X_raw, modality): INPUT: X_raw (raw images), modality ∈ {MRI, PET} OUTPUT: X_preprocessed FOR each image x in X_raw DO: # Skull stripping x ← SkullStrip(x) # Intensity correction and normalization x ← IntensityCorrection(x) x ← Normalize(x, mean=0, std=1) # Registration and alignment to standard template x ← RegisterToAtlas(x, atlas_template) # Smoothing and denoising (Gaussian filter) x ← GaussianSmooth(x, sigma=1.0) # ROI extraction (focus on relevant brain regions) x ← ExtractROI(x) # Resize to standard dimensions x ← Resize(x, target_size=(128, 128, 1)) END FOR RETURN X_preprocessed END FUNCTION FUNCTION Preprocess_Neuropsych(X_NP_raw): INPUT: X_NP_raw (raw neuropsychological scores, n × 10) OUTPUT: X_NP_normalized # Handle missing values (mean imputation on training set) FOR each feature f in X_NP_raw DO: IF missing_values_exist(f) THEN: f ← Impute(f, strategy='mean', reference=training_set) END IF END FOR # Z-score normalization FOR each feature f in X_NP_raw DO: μ ← Mean(f, training_set) σ ← StandardDeviation(f, training_set) f ← (f - μ) / σ END FOR RETURN X_NP_normalized END FUNCTION # STAGE 2: ATTENTION GATE MODULE FUNCTION AttentionGate(X_encoder, X_decoder_upsampled): INPUT: X_encoder: Feature map from encoder path (X_{i,0}) X_decoder_upsampled: Upsampled feature from decoder (X_{i+1,0}) OUTPUT: X_refined: Attention-weighted encoder features # Align encoder and decoder features to same dimensional space E ← Conv2D(X_encoder, W_e) # Equation (2) D ← Conv2D(X_decoder_upsampled, W_u) # Equation (3) # Compute attention coefficients α ← Sigmoid(Conv2D(E + D, W_α) + b_α) # Equation (4) # Apply attention weights to encoder features X_refined ← α ⊙ X_encoder # Equation (5), ⊙ = element-wise mult. RETURN X_refined END FUNCTION # STAGE 3: UNet++ WITH SELF-ATTENTION ARCHITECTURE FUNCTION UNetPP_SA(X_input, hyperparameters): INPUT: X_input: Preprocessed image (128 × 128 × 1) hyperparameters: {learning_rate, n_filters, dropout_rate, batch_size, n_attention_heads, attention_scaling} OUTPUT: F_extracted: Extracted features # -------------------- ENCODER PATH -------------------- # Level 1 X_1_0 ← Conv2D(X_input, filters=64, kernel=3×3) X_1_0 ← BatchNorm(X_1_0) X_1_0 ← ReLU(X_1_0) # Level 2 X_2_0 ← MaxPooling(X_1_0, pool_size=2×2) X_2_0 ← Conv2D(X_2_0, filters=128, kernel=3×3) X_2_0 ← ReLU(X_2_0) # Level 3 X_3_0 ← MaxPooling(X_2_0, pool_size=2×2) X_3_0 ← Conv2D(X_3_0, filters=256, kernel=3×3) X_3_0 ← ReLU(X_3_0) # Level 4 X_4_0 ← MaxPooling(X_3_0, pool_size=2×2) X_4_0 ← Conv2D(X_4_0, filters=512, kernel=3×3) X_4_0 ← ReLU(X_4_0) # Bottleneck (Level 5) X_5_0 ← MaxPooling(X_4_0, pool_size=2×2) X_5_0 ← Conv2D(X_5_0, filters=1024, kernel=3×3) X_5_0 ← ReLU(X_5_0) # -------------------- DECODER PATH WITH ATTENTION -------------------- # Level 4 decoder X_4_1_up ← Upsample(X_5_0, scale=2×2) X_4_0_att ← AttentionGate(X_4_0, X_4_1_up) # Apply attention X_4_1 ← Concatenate([X_4_1_up, X_4_0_att]) X_4_1 ← Conv2D(X_4_1, filters=512, kernel=3×3) X_4_1 ← ReLU(X_4_1) # Level 3 decoder X_3_1_up ← Upsample(X_4_1, scale=2×2) X_3_0_att ← AttentionGate(X_3_0, X_3_1_up) X_3_1 ← Concatenate([X_3_1_up, X_3_0_att]) X_3_1 ← Conv2D(X_3_1, filters=256, kernel=3×3) X_3_1 ← ReLU(X_3_1) # Level 2 decoder X_2_1_up ← Upsample(X_3_1, scale=2×2) X_2_0_att ← AttentionGate(X_2_0, X_2_1_up) X_2_1 ← Concatenate([X_2_1_up, X_2_0_att]) X_2_1 ← Conv2D(X_2_1, filters=128, kernel=3×3) X_2_1 ← ReLU(X_2_1) # Level 1 decoder X_1_1_up ← Upsample(X_2_1, scale=2×2) X_1_0_att ← AttentionGate(X_1_0, X_1_1_up) X_1_1 ← Concatenate([X_1_1_up, X_1_0_att]) X_1_1 ← Conv2D(X_1_1, filters=64, kernel=3×3) X_1_1 ← ReLU(X_1_1) # Dropout for regularization X_1_1 ← Dropout(X_1_1, rate=dropout_rate) F_extracted ← GlobalAveragePooling(X_1_1) RETURN F_extracted END FUNCTION FUNCTION DenseNet_Neuropsych(X_NP): INPUT: X_NP (neuropsychological features, size 10) OUTPUT: F_NP (encoded features, size 64) F_NP ← Dense(X_NP, units=128, activation='relu') F_NP ← Dropout(F_NP, rate=0.3) F_NP ← Dense(F_NP, units=64, activation='relu') # Reshape to match image feature dimensions for concatenation F_NP ← Reshape(F_NP, target_shape=(128, 128, 10)) RETURN F_NP END FUNCTION # STAGE 4: MOTH FLAME OPTIMIZATION (MFO) FUNCTION MothFlameOptimization(UNetPP_SA, X_train, y_train, X_val, y_val): INPUT: UNetPP_SA: Model architecture X_train, y_train: Training data X_val, y_val: Validation data OUTPUT: H_best: Optimal hyperparameters # MFO hyperparameters N ← 30 # Number of moths (population size) T ← 50 # Maximum iterations b ← 1 # Spiral constant # Hyperparameter search space hyperparameter_space ← { learning_rate: [1e-5, 1e-2], n_filters: [32, 128], dropout_rate: [0.1, 0.5], batch_size: [8, 32], n_attention_heads: [2, 8], attention_scaling: [0.5, 2.0] } # Initialize moths (candidate solutions) FOR i = 1 TO N DO: M_i ← RandomInitialize(hyperparameter_space) END FOR # Main MFO loop FOR t = 1 TO T DO: # Step 3: Evaluate fitness for each moth FOR i = 1 TO N DO: # Train UNet++ with current hyperparameters for few epochs model ← Train(UNetPP_SA, X_train, y_train, hyperparameters=M_i, epochs=5) # Evaluate on validation set accuracy_i ← Evaluate(model, X_val, y_val) # Fitness function (Equation 7): minimize negative accuracy Fitness(M_i) ← -accuracy_i END FOR # Sort moths by fitness and select top k as flames flames ← SortByFitness(M_1, ..., M_N) # Step 4: Flame reduction mechanism (Equation 8) num_flames ← Round(N - t × (N - 1) / T) F ← flames[1:num_flames] # Keep only top flames # Step 2: Update moth positions using logarithmic spiral FOR i = 1 TO N DO: # Assign flame to moth (closest or random) j ← AssignFlame(i, num_flames) P_j ← F[j] # Calculate Euclidean distance D ← EuclideanDistance(M_i, P_j) # Generate random number r ← Random(0, 1) # Update moth position (Equation 6) M_i^(t+1) ← P_j + exp(b×D) × cos(2π×r) # Ensure hyperparameters stay within bounds M_i^(t+1) ← ClipToBounds(M_i^(t+1), hyperparameter_space) END FOR # Check convergence IF |Fitness(F_best^t) - Fitness(F_best^(t-1))| < 0.001 THEN: convergence_count ← convergence_count + 1 IF convergence_count ≥ 5 THEN: BREAK # Converged END IF END IF END FOR # Step 5: Return best solution H_best ← F[1] # Best flame = optimal hyperparameters RETURN H_best END FUNCTION # STAGE 5: FEATURE FUSION FUNCTION FeatureFusion(F_MRI, F_PET, F_Neuro): INPUT: F_MRI: Features from MRI (size: 128×128×64) F_PET: Features from PET (size: 128×128×64) F_Neuro: Features from neuropsych (size: 128×128×10) OUTPUT: F_fused: Concatenated multimodal features # Concatenate along feature dimension F_fused ← Concatenate([F_MRI, F_PET, F_Neuro], axis=-1) # Resulting size: 128×128×(64+64+10) = 128×128×138 # Flatten for classification F_fused ← Flatten(F_fused) RETURN F_fused END FUNCTION # STAGE 6: BASE CLASSIFIERS FUNCTION Train_SVM(F_fused, y): INPUT: F_fused (fused features), y (labels) OUTPUT: SVM_model # Train one-vs-rest SVM with RBF kernel FOR k in {0, 1, 2} DO: # AD, MCI, CN y_binary ← (y == k) # Binary labels for class k SVM_k ← Train_SVM_Binary(F_fused, y_binary, kernel='rbf', C=1.0, gamma='scale') END FOR # Decision function (Equation 10, 11) FUNCTION Predict(F_new): FOR k in {0, 1, 2} DO: f_k(F_new) ← w_k^T × Φ(F_new) + b_k END FOR ŷ ← argmax_k f_k(F_new) RETURN ŷ END FUNCTION RETURN SVM_model END FUNCTION FUNCTION Train_kNN(F_fused, y): INPUT: F_fused, y OUTPUT: kNN_model k ← 5 # Number of neighbors distance_metric ← 'euclidean' # Store training data (lazy learning) kNN_model.training_data ← F_fused kNN_model.training_labels ← y FUNCTION Predict(F_new): # Compute distances to all training samples (Equation 9) FOR i = 1 TO n_train DO: dist_i ← sqrt(Σ(F_new_j - F_fused_i_j)²) END FOR # Find k nearest neighbors neighbors ← GetTopK(distances, k=5) # Majority voting (Equation 12) ŷ ← Mode(y[neighbors]) RETURN ŷ END FUNCTION RETURN kNN_model END FUNCTION FUNCTION Train_RandomForest(F_fused, y): INPUT: F_fused, y OUTPUT: RF_model n_estimators ← 1000 max_depth ← None min_samples_split ← 2 # Train ensemble of decision trees FOR t = 1 TO n_estimators DO: # Bootstrap sample F_bootstrap, y_bootstrap ← BootstrapSample(F_fused, y) # Train decision tree tree_t ← TrainDecisionTree(F_bootstrap, y_bootstrap, max_depth=max_depth, min_samples_split=min_samples_split) RF_model.trees[t] ← tree_t END FOR FUNCTION Predict(F_new): # Collect predictions from all trees FOR t = 1 TO n_estimators DO: y_t ← tree_t.Predict(F_new) END FOR # Majority voting (Equation 12) ŷ ← Mode(y_1, y_2, ..., y_n_estimators) RETURN ŷ END FUNCTION RETURN RF_model END FUNCTION # STAGE 7: WEIGHTED STACKING ENSEMBLE FUNCTION WeightedStackingEnsemble(models, F_val, y_val): INPUT: models: {SVM_model, kNN_model, RF_model} F_val, y_val: Validation set for weight optimization OUTPUT: ensemble_model with optimized weights # Grid search for optimal weights (Section 3.6.6.1) weight_range ← [0.1, 0.2, ..., 0.9] best_accuracy ← 0 best_weights ← None FOR w_SVM in weight_range DO: FOR w_kNN in weight_range DO: FOR w_RF in weight_range DO: IF w_SVM + w_kNN + w_RF == 1.0 THEN: # Test current weights accuracy ← EvaluateWeights(models, F_val, y_val, [w_SVM, w_kNN, w_RF]) IF accuracy > best_accuracy THEN: best_accuracy ← accuracy best_weights ← [w_SVM, w_kNN, w_RF] END IF END IF END FOR END FOR END FOR # Store optimal weights ensemble_model.weights ← best_weights ensemble_model.models ← models FUNCTION Predict(F_new): # Get predictions from base classifiers ŷ_SVM ← SVM_model.Predict(F_new) ŷ_kNN ← kNN_model.Predict(F_new) ŷ_RF ← RF_model.Predict(F_new) # Get probability distributions (calibrated) p_SVM ← SVM_model.PredictProba(F_new) # Size: 3 (AD, MCI, CN) p_kNN ← kNN_model.PredictProba(F_new) p_RF ← RF_model.PredictProba(F_new) # Weighted averaging (Equation 13) p_ensemble ← w_SVM × p_SVM + w_kNN × p_kNN + w_RF × p_RF # Final prediction ŷ_final ← argmax(p_ensemble) RETURN ŷ_final, p_ensemble END FUNCTION RETURN ensemble_model END FUNCTION # STAGE 8: HANDLING MISSING MODALITIES FUNCTION HandleMissingModalities(X_MRI, X_PET, X_NP, available_modalities): INPUT: X_MRI, X_PET, X_NP: Input data (may contain None values) available_modalities: Boolean flags {has_MRI, has_PET, has_NP} OUTPUT: F_fused: Fused features with missing modality handling # Zero-filling strategy for missing modalities IF NOT available_modalities['MRI'] THEN: F_MRI ← Zeros(shape=(128, 128, 64)) ELSE: F_MRI ← UNetPP_SA(X_MRI, H_optimal) END IF IF NOT available_modalities['PET'] THEN: F_PET ← Zeros(shape=(128, 128, 64)) ELSE: F_PET ← UNetPP_SA(X_PET, H_optimal) END IF IF NOT available_modalities['NP'] THEN: F_Neuro ← Zeros(shape=(128, 128, 10)) ELSE: F_Neuro ← DenseNet_Neuropsych(X_NP) END IF # Optionally: Mean imputation from training set # (Tested in ablation studies, causes 2-3% accuracy drop) # Create modality presence mask mask ← [available_modalities['MRI'], available_modalities['PET'], available_modalities['NP']] # Concatenate with mask F_fused ← Concatenate([F_MRI, F_PET, F_Neuro, mask]) # Adjust ensemble weights based on available modalities IF NOT available_modalities['PET'] THEN: # Increase weight for MRI-based predictions w_SVM ← w_SVM × 1.2 w_RF ← w_RF × 1.1 # Renormalize weights ← [w_SVM, w_kNN, w_RF] / sum([w_SVM, w_kNN, w_RF]) END IF RETURN F_fused END FUNCTION # MAIN TRAINING PIPELINE FUNCTION Train_UNetPP_SA_MFO_Framework(): # Load and split data X_MRI, X_PET, X_NP, y ← LoadADNIDataset() # Stratified split (70% train, 15% val, 15% test) X_train, y_train ← StratifiedSplit(X, y, ratio=0.70) X_val, y_val ← StratifiedSplit(X_remaining, y_remaining, ratio=0.50) X_test, y_test ← X_remaining, y_remaining # Stage 1: Preprocessing X_MRI_train ← Preprocess_Images(X_MRI_train, 'MRI') X_PET_train ← Preprocess_Images(X_PET_train, 'PET') X_NP_train ← Preprocess_Neuropsych(X_NP_train) # Similarly for validation and test sets X_MRI_val ← Preprocess_Images(X_MRI_val, 'MRI') X_PET_val ← Preprocess_Images(X_PET_val, 'PET') X_NP_val ← Preprocess_Neuropsych(X_NP_val) # Stage 3: Hyperparameter Optimization via MFO H_optimal ← MothFlameOptimization(UNetPP_SA, X_train, y_train, X_val, y_val) PRINT "Optimal hyperparameters:", H_optimal # Stage 2: Feature Extraction with Optimized UNet++(SA) # Initialize optimized model UNetPP_SA_optimized ← BuildModel(UNetPP_SA, H_optimal) # Apply 5-fold cross-validation with regularization FOR fold = 1 TO 5 DO: X_fold_train, y_fold_train ← GetFold(X_train, y_train, fold) X_fold_val, y_fold_val ← GetValidationFold(X_train, y_train, fold) # Data augmentation X_MRI_aug ← AugmentData(X_MRI_fold_train, rotation_range=10, flip=True, intensity_scaling=0.1) X_PET_aug ← AugmentData(X_PET_fold_train, rotation_range=10, flip=True, intensity_scaling=0.1) # Extract features from each modality FOR each sample in X_fold_train DO: F_MRI ← UNetPP_SA_optimized(X_MRI_sample) F_PET ← UNetPP_SA_optimized(X_PET_sample) F_Neuro ← DenseNet_Neuropsych(X_NP_sample) # Stage 4: Feature Fusion F_fused_sample ← FeatureFusion(F_MRI, F_PET, F_Neuro) F_fused_train.append(F_fused_sample) END FOR # Regularization: Dropout (rate=0.3) and L2 (lambda=0.01) # Already applied in UNetPP_SA architecture # Evaluate fold performance fold_accuracy ← Evaluate(UNetPP_SA_optimized, X_fold_val, y_fold_val) PRINT "Fold", fold, "Accuracy:", fold_accuracy END FOR # Final training on full training set FOR each sample in X_train DO: F_MRI ← UNetPP_SA_optimized(X_MRI_sample) F_PET ← UNetPP_SA_optimized(X_PET_sample) F_Neuro ← DenseNet_Neuropsych(X_NP_sample) F_fused_sample ← FeatureFusion(F_MRI, F_PET, F_Neuro) F_fused_train.append(F_fused_sample) END FOR # Extract features for validation set FOR each sample in X_val DO: F_MRI ← UNetPP_SA_optimized(X_MRI_sample) F_PET ← UNetPP_SA_optimized(X_PET_sample) F_Neuro ← DenseNet_Neuropsych(X_NP_sample) F_fused_sample ← FeatureFusion(F_MRI, F_PET, F_Neuro) F_fused_val.append(F_fused_sample) END FOR # Stage 5: Train Base Classifiers SVM_model ← Train_SVM(F_fused_train, y_train) kNN_model ← Train_kNN(F_fused_train, y_train) RF_model ← Train_RandomForest(F_fused_train, y_train) # Stage 6: Weighted Stacking Ensemble models ← {SVM_model, kNN_model, RF_model} ensemble_model ← WeightedStackingEnsemble(models, F_fused_val, y_val) PRINT "Optimal ensemble weights:", ensemble_model.weights RETURN UNetPP_SA_optimized, ensemble_model END FUNCTION # INFERENCE PIPELINE FUNCTION Predict_AD(X_MRI_new, X_PET_new, X_NP_new, UNetPP_SA_model, ensemble_model): INPUT: X_MRI_new, X_PET_new, X_NP_new: New patient data UNetPP_SA_model: Trained feature extractor ensemble_model: Trained ensemble classifier OUTPUT: y_pred: Predicted class {0: AD, 1: MCI, 2: CN} confidence: Prediction probabilities # Preprocessing X_MRI_prep ← Preprocess_Images(X_MRI_new, 'MRI') X_PET_prep ← Preprocess_Images(X_PET_new, 'PET') X_NP_prep ← Preprocess_Neuropsych(X_NP_new) # Check for missing modalities available ← { 'MRI': X_MRI_new is not None, 'PET': X_PET_new is not None, 'NP': X_NP_new is not None } # Feature extraction with missing modality handling F_fused ← HandleMissingModalities(X_MRI_prep, X_PET_prep, X_NP_prep, available) # Ensemble prediction y_pred, confidence ← ensemble_model.Predict(F_fused) # Map to class labels class_names ← {0: 'Alzheimer's Disease', 1: 'Mild Cognitive Impairment', 2: 'Cognitively Normal'} PRINT "Prediction:", class_names[y_pred] PRINT "Confidence: AD={:.2f}%, MCI={:.2f}%, CN={:.2f}%".format( confidence[0]*100, confidence[1]*100, confidence[2]*100) RETURN y_pred, confidence END FUNCTION # EVALUATION METRICS FUNCTION EvaluateModel(y_true, y_pred, y_proba): INPUT: y_true: Ground truth labels y_pred: Predicted labels y_proba: Prediction probabilities OUTPUT: metrics: Dictionary of evaluation metrics # Confusion matrix CM ← ConfusionMatrix(y_true, y_pred) # Per-class metrics FOR class k in {0, 1, 2} DO: TP_k ← CM[k, k] FP_k ← sum(CM[:, k]) - TP_k FN_k ← sum(CM[k, :]) - TP_k TN_k ← sum(CM) - TP_k - FP_k - FN_k Precision_k ← TP_k / (TP_k + FP_k) Recall_k ← TP_k / (TP_k + FN_k) F1_k ← 2 × (Precision_k × Recall_k) / (Precision_k + Recall_k) Specificity_k ← TN_k / (TN_k + FP_k) END FOR # Overall metrics Accuracy ← sum(diagonal(CM)) / sum(CM) Precision_macro ← Mean(Precision_0, Precision_1, Precision_2) Recall_macro ← Mean(Recall_0, Recall_1, Recall_2) F1_macro ← Mean(F1_0, F1_1, F1_2) # AUC-ROC (One-vs-Rest strategy) FOR class k in {0, 1, 2} DO: # Binary classification: class k vs. rest y_binary ← (y_true == k) AUC_k ← ComputeAUC(y_binary, y_proba[:, k]) END FOR AUC_ROC_macro ← Mean(AUC_0, AUC_1, AUC_2) metrics ← { 'Accuracy': Accuracy, 'Precision': Precision_macro, 'Recall': Recall_macro, 'F1-Score': F1_macro, 'AUC-ROC': AUC_ROC_macro } RETURN metrics END FUNCTION # MAIN EXECUTION PROCEDURE Main(): PRINT "=== UNet++(SA)–MFO Framework for AD Prediction ===" # Training phase PRINT "\n[1] Training phase..." UNetPP_SA_model, ensemble_model ← Train_UNetPP_SA_MFO_Framework() # Load test data X_MRI_test, X_PET_test, X_NP_test, y_test ← LoadTestData() # Extract features for test set F_fused_test ← [] FOR each sample in X_test DO: F_MRI ← UNetPP_SA_model(X_MRI_sample) F_PET ← UNetPP_SA_model(X_PET_sample) F_Neuro ← DenseNet_Neuropsych(X_NP_sample) F_fused_sample ← FeatureFusion(F_MRI, F_PET, F_Neuro) F_fused_test.append(F_fused_sample) END FOR # Prediction on test set PRINT "\n[2] Evaluation on test set..." y_pred, y_proba ← ensemble_model.Predict(F_fused_test) # Evaluate performance metrics ← EvaluateModel(y_test, y_pred, y_proba) PRINT "\n=== Final Results ===" PRINT "Accuracy: ", metrics['Accuracy'], "%" PRINT "Precision: ", metrics['Precision'], "%" PRINT "Recall: ", metrics['Recall'], "%" PRINT "F1-Score: ", metrics['F1-Score'], "%" PRINT "AUC-ROC: ", metrics['AUC-ROC'], "%" END PROCEDURE END ALGORITHM