In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Generate synthetic dataset
np.random.seed(42)
companies = [
    "JPMorgan Chase & Co.", "Samsung Electronics", "Intel Corporation", "Apple Inc.",
    "Berkshire Hathaway Inc.", "Home Depot, Inc.", "Johnson & Johnson", "Roche Holding AG",
    "Amazon.com Inc.", "UnitedHealth Group Incorporated", "Toyota Motor Corporation", 
    "Alphabet Inc.", "NVIDIA Corporation", "Microsoft Corporation", "Nestle SA", 
    "Visa Inc.", "Procter & Gamble Co.", "Mastercard Incorporated", "Walmart Inc.", 
    "Facebook, Inc.", "Pfizer Inc.", "Cisco Systems, Inc.", "Exxon Mobil Corporation", 
    "Chevron Corporation", "Pepsico, Inc.", "The Coca-Cola Company", "Bank of America Corporation", 
    "Wells Fargo & Company", "AT&T Inc.", "Comcast Corporation", "Merck & Co., Inc.", 
    "Abbott Laboratories", "AbbVie Inc.", "L'Or√©al S.A.", "Unilever N.V.", "Prologis, Inc.", 
    "Morgan Stanley", "Citigroup Inc.", "General Electric Company", 
    "International Business Machines Corporation (IBM)", "Oracle Corporation", "Broadcom Inc.", 
    "Qualcomm Inc.", "Texas Instruments Incorporated", "Salesforce.com, Inc.", 
    "PayPal Holdings, Inc.", "Netflix, Inc.", "Adobe Inc.", "ASML Holding N.V.", "Tesla, Inc.", 
    "General Motors Company", "Ford Motor Company", "Honda Motor Co., Ltd.", 
    "Bayerische Motoren Werke AG (BMW)", "Daimler AG", "Nissan Motor Co., Ltd.", 
    "Volkswagen AG", "Peugeot S.A.", "Renault S.A."
]
n_samples = 100
np.random.shuffle(companies)
dataset = pd.DataFrame({
    'Company Name': np.random.choice(companies, n_samples),
    'Report Date': pd.date_range(start='1/1/2020', periods=n_samples, freq='M'),
    'Revenue': np.random.normal(3e7, 1e7, n_samples),
    'Net Income': np.random.normal(1e7, 5e6, n_samples),
    'Earnings Per Share (EPS)': np.random.normal(5, 2, n_samples),
    'Total Assets': np.random.normal(5e7, 2e7, n_samples),
    'Total Liabilities': np.random.normal(3e7, 1.5e7, n_samples),
    'Operating Income': np.random.normal(1.5e7, 7e6, n_samples),
    'Cash Flow from Operations': np.random.normal(1e7, 5e6, n_samples),
    'Textual Disclosures': np.random.choice([
        "The company achieved remarkable growth due to new product launches.",
        "Revenue growth was driven by strategic investments.",
        "Net income exceeded analyst expectations, boosted by strong operational performance.",
        "The firm entered several new markets, expanding its global footprint.",
        "Investment in research and development remained high, underpinning future growth."
    ], n_samples)
})

# Data Preprocessing
features = ['Revenue', 'Net Income', 'Earnings Per Share (EPS)', 'Total Assets', 
            'Total Liabilities', 'Operating Income', 'Cash Flow from Operations']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(dataset[features])

# PCA for visualization
pca = PCA(n_components=2)
principal_components = pca.fit_transform(X_scaled)
dataset['PCA1'] = principal_components[:, 0]
dataset['PCA2'] = principal_components[:, 1]

# KMeans Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
dataset['Cluster'] = kmeans.fit_predict(X_scaled)

# Visualization: Clustering
plt.figure(figsize=(10, 6))
sns.scatterplot(x='PCA1', y='PCA2', hue='Cluster', data=dataset, palette='viridis')
plt.title('Clustering of Financial Metrics')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title='Cluster')
plt.show()

# Feature Importance using RandomForestRegressor
X = dataset[features]
y = np.random.normal(0, 1, n_samples)  # Random target for illustration
model = RandomForestRegressor()
model.fit(X, y)
importance = model.feature_importances_
feature_importance_df = pd.DataFrame({'Feature': features, 'Importance': importance})

# Visualization: Feature Importance
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance_df.sort_values(by='Importance', ascending=False))
plt.title('Feature Importance Rankings (HFRP)')
plt.show()

# Loss plot for training and testing (synthetic values)
epochs = np.arange(1, 51)
training_loss = np.linspace(0.05, 0.0013, 50)
testing_loss = np.linspace(0.06, 0.003, 50)

plt.figure(figsize=(10, 5))
plt.plot(epochs, training_loss, label='Training Loss', color='blue')
plt.plot(epochs, testing_loss, label='Testing Loss', color='green')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Testing Loss (HFRP)')
plt.legend()
plt.show()

# Actual vs Predicted plots (random data for illustration)
actual = X.mean(axis=0)
predicted = actual * (1 + np.random.normal(0, 0.02, size=actual.shape))

plt.figure(figsize=(15, 10))
for i, feature in enumerate(features):
    plt.subplot(3, 3, i+1)
    plt.scatter(actual[i], predicted[i], color='blue')
    plt.plot([actual[i], actual[i]], [actual[i], predicted[i]], 'r--')
    plt.title(f'Actual vs Predicted {feature} (HFRP)')
    plt.xlabel(f'Actual {feature}')
    plt.ylabel(f'Predicted {feature}')
    plt.plot([min(actual[i], predicted[i]), max(actual[i], predicted[i])],
             [min(actual[i], predicted[i]), max(actual[i], predicted[i])], 'r--', label='Ideal')
plt.tight_layout()
plt.show()

# Risk analysis: Risk score distribution and reduction percentage
risk_factors = ['Credit Risk', 'Liquidity Risk', 'Market Risk', 'Operational Risk']
risk_before = np.random.uniform(0.6, 0.8, len(risk_factors))
risk_after = risk_before - np.random.uniform(0.2, 0.4, len(risk_factors))
risk_reduction = (risk_before - risk_after) / risk_before * 100

plt.figure(figsize=(10, 5))
plt.bar(risk_factors, risk_reduction, color='teal')
plt.title('Risk Reduction Percentage by Risk Type')
plt.xlabel('Risk Type')
plt.ylabel('Risk Reduction Percentage')
plt.show()

# Detailed Result Tables
results_df = pd.DataFrame({
    'Metric': features,
    'Actual Value': actual,
    'Predicted Value': predicted
})

print("Detailed Results:")
print(results_df)
