#UNet++(SA)–MFO Framework for Early-Stage AD Prediction

# INPUT:
#   - X_MRI: sMRI images (n_samples × H_MRI × W_MRI × C_MRI)
#   - X_PET: PET images (n_samples × H_PET × W_PET × C_PET)
#   - X_NP: Neuropsychological test scores (n_samples × 10 features)
#   - y: Labels {0: AD, 1: MCI, 2: CN}
#
# OUTPUT:
#   - y_pred: Predicted class labels
#   - confidence_scores: Prediction probabilities
# ============================================================================

# STAGE 1: DATA PREPROCESSING
   
    FUNCTION Preprocess_Images(X_raw, modality):
        INPUT: X_raw (raw images), modality ∈ {MRI, PET}
        OUTPUT: X_preprocessed
        
        FOR each image x in X_raw DO:
            # Skull stripping
            x ← SkullStrip(x)
            
            # Intensity correction and normalization
            x ← IntensityCorrection(x)
            x ← Normalize(x, mean=0, std=1)
            
            # Registration and alignment to standard template
            x ← RegisterToAtlas(x, atlas_template)
            
            # Smoothing and denoising (Gaussian filter)
            x ← GaussianSmooth(x, sigma=1.0)
            
            # ROI extraction (focus on relevant brain regions)
            x ← ExtractROI(x)
            
            # Resize to standard dimensions
            x ← Resize(x, target_size=(128, 128, 1))
        END FOR
        
        RETURN X_preprocessed
    END FUNCTION
        
    FUNCTION Preprocess_Neuropsych(X_NP_raw):
        INPUT: X_NP_raw (raw neuropsychological scores, n × 10)
        OUTPUT: X_NP_normalized
        
        # Handle missing values (mean imputation on training set)
        FOR each feature f in X_NP_raw DO:
            IF missing_values_exist(f) THEN:
                f ← Impute(f, strategy='mean', reference=training_set)
            END IF
        END FOR
        
        # Z-score normalization
        FOR each feature f in X_NP_raw DO:
            μ ← Mean(f, training_set)
            σ ← StandardDeviation(f, training_set)
            f ← (f - μ) / σ
        END FOR
        
        RETURN X_NP_normalized
    END FUNCTION
  
# STAGE 2: ATTENTION GATE MODULE
        
    FUNCTION AttentionGate(X_encoder, X_decoder_upsampled):
        INPUT: 
            X_encoder: Feature map from encoder path (X_{i,0})
            X_decoder_upsampled: Upsampled feature from decoder (X_{i+1,0})
        OUTPUT: 
            X_refined: Attention-weighted encoder features
        
        # Align encoder and decoder features to same dimensional space
        E ← Conv2D(X_encoder, W_e)           # Equation (2)
        D ← Conv2D(X_decoder_upsampled, W_u) # Equation (3)
        
        # Compute attention coefficients
        α ← Sigmoid(Conv2D(E + D, W_α) + b_α) # Equation (4)
        
        # Apply attention weights to encoder features
        X_refined ← α ⊙ X_encoder             # Equation (5), ⊙ = element-wise mult.
        
        RETURN X_refined
    END FUNCTION
    
# STAGE 3: UNet++ WITH SELF-ATTENTION ARCHITECTURE
   
    FUNCTION UNetPP_SA(X_input, hyperparameters):
        INPUT: 
            X_input: Preprocessed image (128 × 128 × 1)
            hyperparameters: {learning_rate, n_filters, dropout_rate, 
                             batch_size, n_attention_heads, attention_scaling}
        OUTPUT: 
            F_extracted: Extracted features
        
        # -------------------- ENCODER PATH --------------------
        # Level 1
        X_1_0 ← Conv2D(X_input, filters=64, kernel=3×3)
        X_1_0 ← BatchNorm(X_1_0)
        X_1_0 ← ReLU(X_1_0)
        
        # Level 2
        X_2_0 ← MaxPooling(X_1_0, pool_size=2×2)
        X_2_0 ← Conv2D(X_2_0, filters=128, kernel=3×3)
        X_2_0 ← ReLU(X_2_0)
        
        # Level 3
        X_3_0 ← MaxPooling(X_2_0, pool_size=2×2)
        X_3_0 ← Conv2D(X_3_0, filters=256, kernel=3×3)
        X_3_0 ← ReLU(X_3_0)
        
        # Level 4
        X_4_0 ← MaxPooling(X_3_0, pool_size=2×2)
        X_4_0 ← Conv2D(X_4_0, filters=512, kernel=3×3)
        X_4_0 ← ReLU(X_4_0)
        
        # Bottleneck (Level 5)
        X_5_0 ← MaxPooling(X_4_0, pool_size=2×2)
        X_5_0 ← Conv2D(X_5_0, filters=1024, kernel=3×3)
        X_5_0 ← ReLU(X_5_0)
        
        # -------------------- DECODER PATH WITH ATTENTION --------------------
        # Level 4 decoder
        X_4_1_up ← Upsample(X_5_0, scale=2×2)
        X_4_0_att ← AttentionGate(X_4_0, X_4_1_up)  # Apply attention
        X_4_1 ← Concatenate([X_4_1_up, X_4_0_att])
        X_4_1 ← Conv2D(X_4_1, filters=512, kernel=3×3)
        X_4_1 ← ReLU(X_4_1)
        
        # Level 3 decoder
        X_3_1_up ← Upsample(X_4_1, scale=2×2)
        X_3_0_att ← AttentionGate(X_3_0, X_3_1_up)
        X_3_1 ← Concatenate([X_3_1_up, X_3_0_att])
        X_3_1 ← Conv2D(X_3_1, filters=256, kernel=3×3)
        X_3_1 ← ReLU(X_3_1)
        
        # Level 2 decoder
        X_2_1_up ← Upsample(X_3_1, scale=2×2)
        X_2_0_att ← AttentionGate(X_2_0, X_2_1_up)
        X_2_1 ← Concatenate([X_2_1_up, X_2_0_att])
        X_2_1 ← Conv2D(X_2_1, filters=128, kernel=3×3)
        X_2_1 ← ReLU(X_2_1)
        
        # Level 1 decoder
        X_1_1_up ← Upsample(X_2_1, scale=2×2)
        X_1_0_att ← AttentionGate(X_1_0, X_1_1_up)
        X_1_1 ← Concatenate([X_1_1_up, X_1_0_att])
        X_1_1 ← Conv2D(X_1_1, filters=64, kernel=3×3)
        X_1_1 ← ReLU(X_1_1)
        
        # Dropout for regularization
        X_1_1 ← Dropout(X_1_1, rate=dropout_rate)
        
        F_extracted ← GlobalAveragePooling(X_1_1)
        
        RETURN F_extracted
    END FUNCTION
        
    FUNCTION DenseNet_Neuropsych(X_NP):
        INPUT: X_NP (neuropsychological features, size 10)
        OUTPUT: F_NP (encoded features, size 64)
        
        F_NP ← Dense(X_NP, units=128, activation='relu')
        F_NP ← Dropout(F_NP, rate=0.3)
        F_NP ← Dense(F_NP, units=64, activation='relu')
        
        # Reshape to match image feature dimensions for concatenation
        F_NP ← Reshape(F_NP, target_shape=(128, 128, 10))
        
        RETURN F_NP
    END FUNCTION
    
# STAGE 4: MOTH FLAME OPTIMIZATION (MFO)
   
    FUNCTION MothFlameOptimization(UNetPP_SA, X_train, y_train, X_val, y_val):
        INPUT:
            UNetPP_SA: Model architecture
            X_train, y_train: Training data
            X_val, y_val: Validation data
        OUTPUT:
            H_best: Optimal hyperparameters
        
        # MFO hyperparameters
        N ← 30                    # Number of moths (population size)
        T ← 50                    # Maximum iterations
        b ← 1                     # Spiral constant
        
        # Hyperparameter search space
        hyperparameter_space ← {
            learning_rate: [1e-5, 1e-2],
            n_filters: [32, 128],
            dropout_rate: [0.1, 0.5],
            batch_size: [8, 32],
            n_attention_heads: [2, 8],
            attention_scaling: [0.5, 2.0]
        }
        
        # Initialize moths (candidate solutions)
        FOR i = 1 TO N DO:
            M_i ← RandomInitialize(hyperparameter_space)
        END FOR
        
        # Main MFO loop
        FOR t = 1 TO T DO:
            # Step 3: Evaluate fitness for each moth
            FOR i = 1 TO N DO:
                # Train UNet++ with current hyperparameters for few epochs
                model ← Train(UNetPP_SA, X_train, y_train, 
                             hyperparameters=M_i, epochs=5)
                
                # Evaluate on validation set
                accuracy_i ← Evaluate(model, X_val, y_val)
                
                # Fitness function (Equation 7): minimize negative accuracy
                Fitness(M_i) ← -accuracy_i
            END FOR
            
            # Sort moths by fitness and select top k as flames
            flames ← SortByFitness(M_1, ..., M_N)
            
            # Step 4: Flame reduction mechanism (Equation 8)
            num_flames ← Round(N - t × (N - 1) / T)
            F ← flames[1:num_flames]  # Keep only top flames
            
            # Step 2: Update moth positions using logarithmic spiral
            FOR i = 1 TO N DO:
                # Assign flame to moth (closest or random)
                j ← AssignFlame(i, num_flames)
                P_j ← F[j]
                
                # Calculate Euclidean distance
                D ← EuclideanDistance(M_i, P_j)
                
                # Generate random number
                r ← Random(0, 1)
                
                # Update moth position (Equation 6)
                M_i^(t+1) ← P_j + exp(b×D) × cos(2π×r)
                
                # Ensure hyperparameters stay within bounds
                M_i^(t+1) ← ClipToBounds(M_i^(t+1), hyperparameter_space)
            END FOR
            
            # Check convergence
            IF |Fitness(F_best^t) - Fitness(F_best^(t-1))| < 0.001 THEN:
                convergence_count ← convergence_count + 1
                IF convergence_count ≥ 5 THEN:
                    BREAK  # Converged
                END IF
            END IF
        END FOR
        
        # Step 5: Return best solution
        H_best ← F[1]  # Best flame = optimal hyperparameters
        
        RETURN H_best
    END FUNCTION
    
# STAGE 5: FEATURE FUSION
    
    FUNCTION FeatureFusion(F_MRI, F_PET, F_Neuro):
        INPUT:
            F_MRI: Features from MRI (size: 128×128×64)
            F_PET: Features from PET (size: 128×128×64)
            F_Neuro: Features from neuropsych (size: 128×128×10)
        OUTPUT:
            F_fused: Concatenated multimodal features
        
        # Concatenate along feature dimension
        F_fused ← Concatenate([F_MRI, F_PET, F_Neuro], axis=-1)
        # Resulting size: 128×128×(64+64+10) = 128×128×138
        
        # Flatten for classification
        F_fused ← Flatten(F_fused)
        
        RETURN F_fused
    END FUNCTION
        
# STAGE 6: BASE CLASSIFIERS
    
    FUNCTION Train_SVM(F_fused, y):
        INPUT: F_fused (fused features), y (labels)
        OUTPUT: SVM_model
        
        # Train one-vs-rest SVM with RBF kernel
        FOR k in {0, 1, 2} DO:  # AD, MCI, CN
            y_binary ← (y == k)  # Binary labels for class k
            SVM_k ← Train_SVM_Binary(F_fused, y_binary, 
                                     kernel='rbf', 
                                     C=1.0, 
                                     gamma='scale')
        END FOR
        
        # Decision function (Equation 10, 11)
        FUNCTION Predict(F_new):
            FOR k in {0, 1, 2} DO:
                f_k(F_new) ← w_k^T × Φ(F_new) + b_k
            END FOR
            ŷ ← argmax_k f_k(F_new)
            RETURN ŷ
        END FUNCTION
        
        RETURN SVM_model
    END FUNCTION
        
    FUNCTION Train_kNN(F_fused, y):
        INPUT: F_fused, y
        OUTPUT: kNN_model
        
        k ← 5  # Number of neighbors
        distance_metric ← 'euclidean'
        
        # Store training data (lazy learning)
        kNN_model.training_data ← F_fused
        kNN_model.training_labels ← y
        
        FUNCTION Predict(F_new):
            # Compute distances to all training samples (Equation 9)
            FOR i = 1 TO n_train DO:
                dist_i ← sqrt(Σ(F_new_j - F_fused_i_j)²)
            END FOR
            
            # Find k nearest neighbors
            neighbors ← GetTopK(distances, k=5)
            
            # Majority voting (Equation 12)
            ŷ ← Mode(y[neighbors])
            
            RETURN ŷ
        END FUNCTION
        
        RETURN kNN_model
    END FUNCTION
        
    FUNCTION Train_RandomForest(F_fused, y):
        INPUT: F_fused, y
        OUTPUT: RF_model
        
        n_estimators ← 1000
        max_depth ← None
        min_samples_split ← 2
        
        # Train ensemble of decision trees
        FOR t = 1 TO n_estimators DO:
            # Bootstrap sample
            F_bootstrap, y_bootstrap ← BootstrapSample(F_fused, y)
            
            # Train decision tree
            tree_t ← TrainDecisionTree(F_bootstrap, y_bootstrap,
                                       max_depth=max_depth,
                                       min_samples_split=min_samples_split)
            
            RF_model.trees[t] ← tree_t
        END FOR
        
        FUNCTION Predict(F_new):
            # Collect predictions from all trees
            FOR t = 1 TO n_estimators DO:
                y_t ← tree_t.Predict(F_new)
            END FOR
            
            # Majority voting (Equation 12)
            ŷ ← Mode(y_1, y_2, ..., y_n_estimators)
            
            RETURN ŷ
        END FUNCTION
        
        RETURN RF_model
    END FUNCTION
    
# STAGE 7: WEIGHTED STACKING ENSEMBLE
    
    FUNCTION WeightedStackingEnsemble(models, F_val, y_val):
        INPUT:
            models: {SVM_model, kNN_model, RF_model}
            F_val, y_val: Validation set for weight optimization
        OUTPUT:
            ensemble_model with optimized weights
        
        # Grid search for optimal weights (Section 3.6.6.1)
        weight_range ← [0.1, 0.2, ..., 0.9]
        best_accuracy ← 0
        best_weights ← None
        
        FOR w_SVM in weight_range DO:
            FOR w_kNN in weight_range DO:
                FOR w_RF in weight_range DO:
                    IF w_SVM + w_kNN + w_RF == 1.0 THEN:
                        # Test current weights
                        accuracy ← EvaluateWeights(models, F_val, y_val,
                                                   [w_SVM, w_kNN, w_RF])
                        
                        IF accuracy > best_accuracy THEN:
                            best_accuracy ← accuracy
                            best_weights ← [w_SVM, w_kNN, w_RF]
                        END IF
                    END IF
                END FOR
            END FOR
        END FOR
        
        # Store optimal weights
        ensemble_model.weights ← best_weights
        ensemble_model.models ← models
        
        FUNCTION Predict(F_new):
            # Get predictions from base classifiers
            ŷ_SVM ← SVM_model.Predict(F_new)
            ŷ_kNN ← kNN_model.Predict(F_new)
            ŷ_RF ← RF_model.Predict(F_new)
            
            # Get probability distributions (calibrated)
            p_SVM ← SVM_model.PredictProba(F_new)  # Size: 3 (AD, MCI, CN)
            p_kNN ← kNN_model.PredictProba(F_new)
            p_RF ← RF_model.PredictProba(F_new)
            
            # Weighted averaging (Equation 13)
            p_ensemble ← w_SVM × p_SVM + w_kNN × p_kNN + w_RF × p_RF
            
            # Final prediction
            ŷ_final ← argmax(p_ensemble)
            
            RETURN ŷ_final, p_ensemble
        END FUNCTION
        
        RETURN ensemble_model
    END FUNCTION
 
# STAGE 8: HANDLING MISSING MODALITIES
    
    FUNCTION HandleMissingModalities(X_MRI, X_PET, X_NP, available_modalities):
        INPUT:
            X_MRI, X_PET, X_NP: Input data (may contain None values)
            available_modalities: Boolean flags {has_MRI, has_PET, has_NP}
        OUTPUT:
            F_fused: Fused features with missing modality handling
        
        # Zero-filling strategy for missing modalities
        IF NOT available_modalities['MRI'] THEN:
            F_MRI ← Zeros(shape=(128, 128, 64))
        ELSE:
            F_MRI ← UNetPP_SA(X_MRI, H_optimal)
        END IF
        
        IF NOT available_modalities['PET'] THEN:
            F_PET ← Zeros(shape=(128, 128, 64))
        ELSE:
            F_PET ← UNetPP_SA(X_PET, H_optimal)
        END IF
        
        IF NOT available_modalities['NP'] THEN:
            F_Neuro ← Zeros(shape=(128, 128, 10))
        ELSE:
            F_Neuro ← DenseNet_Neuropsych(X_NP)
        END IF
        
        # Optionally: Mean imputation from training set
        # (Tested in ablation studies, causes 2-3% accuracy drop)
        
        # Create modality presence mask
        mask ← [available_modalities['MRI'], 
                available_modalities['PET'], 
                available_modalities['NP']]
        
        # Concatenate with mask
        F_fused ← Concatenate([F_MRI, F_PET, F_Neuro, mask])
        
        # Adjust ensemble weights based on available modalities
        IF NOT available_modalities['PET'] THEN:
            # Increase weight for MRI-based predictions
            w_SVM ← w_SVM × 1.2
            w_RF ← w_RF × 1.1
            # Renormalize
            weights ← [w_SVM, w_kNN, w_RF] / sum([w_SVM, w_kNN, w_RF])
        END IF
        
        RETURN F_fused
    END FUNCTION

# MAIN TRAINING PIPELINE
    
    FUNCTION Train_UNetPP_SA_MFO_Framework():
        
        # Load and split data
        X_MRI, X_PET, X_NP, y ← LoadADNIDataset()
        
        # Stratified split (70% train, 15% val, 15% test)
        X_train, y_train ← StratifiedSplit(X, y, ratio=0.70)
        X_val, y_val ← StratifiedSplit(X_remaining, y_remaining, ratio=0.50)
        X_test, y_test ← X_remaining, y_remaining
        
        # Stage 1: Preprocessing
        X_MRI_train ← Preprocess_Images(X_MRI_train, 'MRI')
        X_PET_train ← Preprocess_Images(X_PET_train, 'PET')
        X_NP_train ← Preprocess_Neuropsych(X_NP_train)
        
        # Similarly for validation and test sets
        X_MRI_val ← Preprocess_Images(X_MRI_val, 'MRI')
        X_PET_val ← Preprocess_Images(X_PET_val, 'PET')
        X_NP_val ← Preprocess_Neuropsych(X_NP_val)
        
        # Stage 3: Hyperparameter Optimization via MFO
        H_optimal ← MothFlameOptimization(UNetPP_SA, 
                                          X_train, y_train,
                                          X_val, y_val)
        
        PRINT "Optimal hyperparameters:", H_optimal
        
        # Stage 2: Feature Extraction with Optimized UNet++(SA)
        # Initialize optimized model
        UNetPP_SA_optimized ← BuildModel(UNetPP_SA, H_optimal)
        
        # Apply 5-fold cross-validation with regularization
        FOR fold = 1 TO 5 DO:
            X_fold_train, y_fold_train ← GetFold(X_train, y_train, fold)
            X_fold_val, y_fold_val ← GetValidationFold(X_train, y_train, fold)
            
            # Data augmentation
            X_MRI_aug ← AugmentData(X_MRI_fold_train,
                                    rotation_range=10,
                                    flip=True,
                                    intensity_scaling=0.1)
            X_PET_aug ← AugmentData(X_PET_fold_train,
                                   rotation_range=10,
                                   flip=True,
                                   intensity_scaling=0.1)
            
            # Extract features from each modality
            FOR each sample in X_fold_train DO:
                F_MRI ← UNetPP_SA_optimized(X_MRI_sample)
                F_PET ← UNetPP_SA_optimized(X_PET_sample)
                F_Neuro ← DenseNet_Neuropsych(X_NP_sample)
                
                # Stage 4: Feature Fusion
                F_fused_sample ← FeatureFusion(F_MRI, F_PET, F_Neuro)
                
                F_fused_train.append(F_fused_sample)
            END FOR
            
            # Regularization: Dropout (rate=0.3) and L2 (lambda=0.01)
            # Already applied in UNetPP_SA architecture
            
            # Evaluate fold performance
            fold_accuracy ← Evaluate(UNetPP_SA_optimized, 
                                    X_fold_val, y_fold_val)
            PRINT "Fold", fold, "Accuracy:", fold_accuracy
        END FOR
        
        # Final training on full training set
        FOR each sample in X_train DO:
            F_MRI ← UNetPP_SA_optimized(X_MRI_sample)
            F_PET ← UNetPP_SA_optimized(X_PET_sample)
            F_Neuro ← DenseNet_Neuropsych(X_NP_sample)
            F_fused_sample ← FeatureFusion(F_MRI, F_PET, F_Neuro)
            F_fused_train.append(F_fused_sample)
        END FOR
        
        # Extract features for validation set
        FOR each sample in X_val DO:
            F_MRI ← UNetPP_SA_optimized(X_MRI_sample)
            F_PET ← UNetPP_SA_optimized(X_PET_sample)
            F_Neuro ← DenseNet_Neuropsych(X_NP_sample)
            F_fused_sample ← FeatureFusion(F_MRI, F_PET, F_Neuro)
            F_fused_val.append(F_fused_sample)
        END FOR
        
        # Stage 5: Train Base Classifiers
        SVM_model ← Train_SVM(F_fused_train, y_train)
        kNN_model ← Train_kNN(F_fused_train, y_train)
        RF_model ← Train_RandomForest(F_fused_train, y_train)
        
        # Stage 6: Weighted Stacking Ensemble
        models ← {SVM_model, kNN_model, RF_model}
        ensemble_model ← WeightedStackingEnsemble(models, F_fused_val, y_val)
        
        PRINT "Optimal ensemble weights:", ensemble_model.weights
        
        RETURN UNetPP_SA_optimized, ensemble_model
    END FUNCTION
    
# INFERENCE PIPELINE
    
    FUNCTION Predict_AD(X_MRI_new, X_PET_new, X_NP_new, 
                        UNetPP_SA_model, ensemble_model):
        INPUT:
            X_MRI_new, X_PET_new, X_NP_new: New patient data
            UNetPP_SA_model: Trained feature extractor
            ensemble_model: Trained ensemble classifier
        OUTPUT:
            y_pred: Predicted class {0: AD, 1: MCI, 2: CN}
            confidence: Prediction probabilities
        
        # Preprocessing
        X_MRI_prep ← Preprocess_Images(X_MRI_new, 'MRI')
        X_PET_prep ← Preprocess_Images(X_PET_new, 'PET')
        X_NP_prep ← Preprocess_Neuropsych(X_NP_new)
        
        # Check for missing modalities
        available ← {
            'MRI': X_MRI_new is not None,
            'PET': X_PET_new is not None,
            'NP': X_NP_new is not None
        }
        
        # Feature extraction with missing modality handling
        F_fused ← HandleMissingModalities(X_MRI_prep, X_PET_prep, X_NP_prep,
                                          available)
        
        # Ensemble prediction
        y_pred, confidence ← ensemble_model.Predict(F_fused)
        
        # Map to class labels
        class_names ← {0: 'Alzheimer's Disease', 
                       1: 'Mild Cognitive Impairment', 
                       2: 'Cognitively Normal'}
        
        PRINT "Prediction:", class_names[y_pred]
        PRINT "Confidence: AD={:.2f}%, MCI={:.2f}%, CN={:.2f}%".format(
               confidence[0]*100, confidence[1]*100, confidence[2]*100)
        
        RETURN y_pred, confidence
    END FUNCTION
      
# EVALUATION METRICS
    
    FUNCTION EvaluateModel(y_true, y_pred, y_proba):
        INPUT:
            y_true: Ground truth labels
            y_pred: Predicted labels
            y_proba: Prediction probabilities
        OUTPUT:
            metrics: Dictionary of evaluation metrics
        
        # Confusion matrix
        CM ← ConfusionMatrix(y_true, y_pred)
        
        # Per-class metrics
        FOR class k in {0, 1, 2} DO:
            TP_k ← CM[k, k]
            FP_k ← sum(CM[:, k]) - TP_k
            FN_k ← sum(CM[k, :]) - TP_k
            TN_k ← sum(CM) - TP_k - FP_k - FN_k
            
            Precision_k ← TP_k / (TP_k + FP_k)
            Recall_k ← TP_k / (TP_k + FN_k)
            F1_k ← 2 × (Precision_k × Recall_k) / (Precision_k + Recall_k)
            Specificity_k ← TN_k / (TN_k + FP_k)
        END FOR
        
        # Overall metrics
        Accuracy ← sum(diagonal(CM)) / sum(CM)
        
        Precision_macro ← Mean(Precision_0, Precision_1, Precision_2)
        Recall_macro ← Mean(Recall_0, Recall_1, Recall_2)
        F1_macro ← Mean(F1_0, F1_1, F1_2)
        
        # AUC-ROC (One-vs-Rest strategy)
        FOR class k in {0, 1, 2} DO:
            # Binary classification: class k vs. rest
            y_binary ← (y_true == k)
            AUC_k ← ComputeAUC(y_binary, y_proba[:, k])
        END FOR
        
        AUC_ROC_macro ← Mean(AUC_0, AUC_1, AUC_2)
        
        metrics ← {
            'Accuracy': Accuracy,
            'Precision': Precision_macro,
            'Recall': Recall_macro,
            'F1-Score': F1_macro,
            'AUC-ROC': AUC_ROC_macro
        }
        
        RETURN metrics
    END FUNCTION
     
# MAIN EXECUTION
    
    PROCEDURE Main():
        PRINT "=== UNet++(SA)–MFO Framework for AD Prediction ==="
        
        # Training phase
        PRINT "\n[1] Training phase..."
        UNetPP_SA_model, ensemble_model ← Train_UNetPP_SA_MFO_Framework()
        
        # Load test data
        X_MRI_test, X_PET_test, X_NP_test, y_test ← LoadTestData()
        
        # Extract features for test set
        F_fused_test ← []
        FOR each sample in X_test DO:
            F_MRI ← UNetPP_SA_model(X_MRI_sample)
            F_PET ← UNetPP_SA_model(X_PET_sample)
            F_Neuro ← DenseNet_Neuropsych(X_NP_sample)
            F_fused_sample ← FeatureFusion(F_MRI, F_PET, F_Neuro)
            F_fused_test.append(F_fused_sample)
        END FOR
        
        # Prediction on test set
        PRINT "\n[2] Evaluation on test set..."
        y_pred, y_proba ← ensemble_model.Predict(F_fused_test)
        
        # Evaluate performance
        metrics ← EvaluateModel(y_test, y_pred, y_proba)
        
        PRINT "\n=== Final Results ==="
        PRINT "Accuracy:  ", metrics['Accuracy'], "%"
        PRINT "Precision: ", metrics['Precision'], "%"
        PRINT "Recall:    ", metrics['Recall'], "%"
        PRINT "F1-Score:  ", metrics['F1-Score'], "%"
        PRINT "AUC-ROC:   ", metrics['AUC-ROC'], "%"
        
    END PROCEDURE
    
END ALGORITHM