# -*- coding: utf-8 -*-
"""plantstresscode.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1tEQ745x8hJs2k_hvL-HnUXOk4yqQhUw-
"""

import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from Bio import SeqIO
from Bio.SeqUtils import ProtParam
#from Bio.Alphabet import  generic_protein
#from Bio.Seq import Seq
import numpy as np

def extract_pssm_features(pssm_file):
    # read in PSSM file
    with open(pssm_file) as f:
        pssm_lines = f.readlines()[3:-6]

    # extract features
    features = []
    for line in pssm_lines:
        values = line.split()[22:42]
        features.append(list(map(float, values)))
    features = np.array(features)

    return features
# Convert PSSM features to a numpy array
X = np.array(pssm_features)

# Convert labels to a numpy array
y = np.array(labels)
rom Bio import SeqIO
import numpy as np

def extract_fasta_features(fasta_file):
    # read in FASTA sequence
    fasta_record = SeqIO.read("/content/Plant_sequences.fasta", "fasta")
    seq = str(fasta_record.seq)

    # compute features
    hydrophobicity = []
    polarity = []
    charge = []
    for aa in seq:
        # compute hydrophobicity feature
        hydrophobicity.append(hydrophobicity_dict[aa])

        # compute polarity feature
        polarity.append(polarity_dict[aa])

        # compute charge feature
        charge.append(charge_dict[aa])

    # combine features into a single vector
    features = np.hstack((hydrophobicity.reshape(-1, 1), polarity.reshape(-1, 1), charge.reshape(-1, 1)))

    return features
print(features)

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Bidirectional, LSTM

# Load the data from a CSV file
data = pd.read_csv("/content/drive/MyDrive/stressinput.csv", header=None)

# Split the data into features and labels
features = data.iloc[:, :-1].values
labels = data.iloc[:, -1].values

# Scale the features using StandardScaler
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Split the data into training and test sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.2, random_state=42)

# Define the CNN layers for feature extraction
cnn_model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(train_features.shape[1], 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten()
])

# Reshape the data for CNN input
train_features_cnn = train_features.reshape(train_features.shape[0], train_features.shape[1], 1)
test_features_cnn = test_features.reshape(test_features.shape[0], test_features.shape[1], 1)

# Extract features using CNN layers
train_features_cnn = cnn_model.predict(train_features_cnn)
test_features_cnn = cnn_model.predict(test_features_cnn)

# Define the RBF network (same as provided)
# Define the RBF network
class RBFNet:
    def __init__(self, input_dim, output_dim, hidden_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.centers = None
        self.weights = None

    def fit(self, X, y):
        kmeans = KMeans(n_clusters=self.hidden_dim)
        kmeans.fit(X)
        self.centers = kmeans.cluster_centers_

        # Calculate the width parameter for the RBFs
        dmax = np.max([np.linalg.norm(self.centers[i] - self.centers[j]) for i in range(self.hidden_dim) for j in range(self.hidden_dim)])
        self.sigma = dmax / np.sqrt(2 * self.hidden_dim)

        # Calculate the hidden layer activations
        X_transformed = np.zeros((X.shape[0], self.hidden_dim))
        for i in range(X.shape[0]):
            for j in range(self.hidden_dim):
                X_transformed[i, j] = self.rbf(X[i], self.centers[j])

        # Add a bias term to the hidden layer activations
        X_transformed = np.concatenate((X_transformed, np.ones((X.shape[0], 1))), axis=1)

        # Solve for the weights using least squares regression
        self.weights = np.linalg.lstsq(X_transformed, y, rcond=None)[0]

    def predict(self, X):
        # Calculate the hidden layer activations
        X_transformed = np.zeros((X.shape[0], self.hidden_dim))
        for i in range(X.shape[0]):
            for j in range(self.hidden_dim):
                X_transformed[i, j] = self.rbf(X[i], self.centers[j])

        # Add a bias term to the hidden layer activations
        X_transformed = np.concatenate((X_transformed, np.ones((X.shape[0], 1))), axis=1)

        # Perform the prediction
        return np.dot(X_transformed, self.weights)

    def rbf(self, x, c):
        return np.exp(-np.linalg.norm(x - c) ** 2 / (2 * self.sigma ** 2))

# Create the RBF network
rbf = RBFNet(input_dim=train_features_cnn.shape[1], output_dim=1, hidden_dim=50)

# Fit the RBF network on the CNN-extracted features
rbf.fit(train_features_cnn, train_labels)

# Predict using RBF network
train_rbf_predictions = rbf.predict(train_features_cnn)
test_rbf_predictions = rbf.predict(test_features_cnn)

# Concatenate RBF predictions with CNN-extracted features
train_features_with_rbf = np.concatenate((train_features_cnn, train_rbf_predictions.reshape(-1, 1)), axis=1)
test_features_with_rbf = np.concatenate((test_features_cnn, test_rbf_predictions.reshape(-1, 1)), axis=1)

# Reshape the data for Bi-LSTM input
train_features_with_rbf = train_features_with_rbf.reshape(train_features_with_rbf.shape[0], train_features_with_rbf.shape[1], 1)
test_features_with_rbf = test_features_with_rbf.reshape(test_features_with_rbf.shape[0], test_features_with_rbf.shape[1], 1)

# Define the Bi-LSTM network for classification
bi_lstm_model = Sequential([
    Bidirectional(LSTM(64, return_sequences=True), input_shape=(train_features_with_rbf.shape[1], train_features_with_rbf.shape[2])),
    Bidirectional(LSTM(128, return_sequences=True)),
    Bidirectional(LSTM(128)),
    Dense(1, activation='sigmoid')
])

# Compile the Bi-LSTM network
bi_lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the Bi-LSTM network
bi_lstm_model.fit(train_features_with_rbf, train_labels, epochs=5, validation_data=(test_features_with_rbf, test_labels))

# Evaluate the Bi-LSTM model on the training and test data
train_loss, train_accuracy = bi_lstm_model.evaluate(train_features_with_rbf, train_labels)
test_loss, test_accuracy = bi_lstm_model.evaluate(test_features_with_rbf, test_labels)

print("Hybrid Model (CNN + RBF + Bi-LSTM):")
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)
# Evaluate the Bi-LSTM model on the test data and obtain predictions
test_predictions = bi_lstm_model.predict(test_features_with_rbf)
test_predictions = (test_predictions > 0.5).astype(int)

# Compute the confusion matrix for the test predictions
conf_matrix = confusion_matrix(test_labels, test_predictions)

# Create a heatmap for the confusion matrix
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.2)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False,
            xticklabels=["Predicted Negative", "Predicted Positive"],
            yticklabels=["Actual Negative", "Actual Positive"])
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix - Hybrid Model (CNN + RBF + Bi-LSTM)")
plt.show()

print("Hybrid Model Confusion Matrix:")
print(conf_matrix)
true_positive = conf_matrix[1, 1]
false_positive = conf_matrix[0, 1]
true_negative = conf_matrix[0, 0]
false_negative = conf_matrix[1, 0]

sensitivity = true_positive / (true_positive + false_negative)
specificity = true_negative / (true_negative + false_positive)

print("Sensitivity:", sensitivity)
print("Specificity:", specificity)