%pip install -r ../requirements.txt
%load_ext autoreload
%autoreload 2

# Import necessary libraries
import sys
import os
import shutil
import joblib
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

sys.path.append(os.path.abspath(os.path.join(os.pardir, 'src')))

# Import project-specific internal modules
from preprocessor import DataPreprocessor
from src.plots import PlotGenerator
from src import config
from config import RUN_CONFIGURATION, EMOTION_STATES, NLP_CONFIG, SENTIMENT_MODEL_EXPORT_PATH_RAW, \
    SENTIMENT_MODEL_EXPORT_PATH_OPTIMIZED, EMOTION_VARIATIONS_PATH, NEGATION_PATTERNS_PATH, \
    HYPERPARAMETERS, RANDOM_STATE, PREDICTION_MODEL_EXPORT_PATH, TOPIC_MODEL_EXPORT_PATH, \
    is_step_enabled

# Unsupervised learning imports
from clustering import ClusteringAnalysis

# Supervised learning imports
from src.no_show_prediction import NoShowPredictionModel

# NLP imports
from src.sentiment_analysis import SentimentAnalysisModel
from src.emotion_postprocessor import EmotionPostProcessor
from src.clinical_notes_prediction import ClinicalNotesNoShowPredictor
from src.clinical_topic_model import ClinicalTopicModel

sns.set(style='whitegrid')
warnings.filterwarnings("ignore")
plt.style.use('default')
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Create an instance of the preprocessing and plotting classes
preprocessor = DataPreprocessor(config)
plotter = PlotGenerator(style='whitegrid', palette='viridis', figsize=(10, 6))

if is_step_enabled('dataload'):
    df = preprocessor.load_data(config.DATASET_PATH)
    display("shape:", df.shape)
    display("columns:", df.columns)
    display(df.head())
    display(df.describe())

'shape:'

(110527, 17)

'columns:'

Index(['PatientId', 'AppointmentID', 'Gender', 'ScheduledDay',
       'AppointmentDay', 'Age', 'Neighbourhood', 'Scholarship', 'Hypertension',
       'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received', 'No-show',
       'PatientNotes', 'PatientSentiment', 'NoShowReason'],
      dtype='object')

if is_step_enabled('data_preprocess'):
    df = preprocessor.preprocess_data(df)
    display("shape:", df.shape)
    display("columns:", df.columns)
    display(df.head())
    display(df.describe())

[preprocessing] Starting preprocessing...
Initial shape of the dataset: (110527, 17)
Initial columns in the dataset: Index(['PatientId', 'AppointmentID', 'Gender', 'ScheduledDay',
       'AppointmentDay', 'Age', 'Neighbourhood', 'Scholarship', 'Hypertension',
       'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received', 'No-show',
       'PatientNotes', 'PatientSentiment', 'NoShowReason'],
      dtype='object')
Dropping unnecessary columns...
Remaining columns: Index(['Gender', 'ScheduledDay', 'AppointmentDay', 'Age', 'Neighbourhood',
       'Scholarship', 'Hypertension', 'Diabetes', 'Alcoholism', 'Handcap',
       'SMS_received', 'No-show', 'PatientNotes', 'PatientSentiment',
       'NoShowReason'],
      dtype='object')
Converting date columns to datetime...
Handling missing values...
Adding emotional state columns...
Emotional state columns added: ['anxiety', 'stress', 'confusion', 'hopeful', 'fear']
Final shape of the dataset: (110527, 21)
Final columns in the dataset: Index(['Gender', 'ScheduledDay', 'AppointmentDay', 'Age', 'Neighbourhood',
       'Scholarship', 'Hypertension', 'Diabetes', 'Alcoholism', 'Handcap',
       'SMS_received', 'No-show', 'PatientNotes', 'PatientSentiment',
       'NoShowReason', 'WaitDays', 'anxiety', 'stress', 'confusion', 'hopeful',
       'fear'],
      dtype='object')
[preprocessing] Preprocessing complete.

'shape:'

(110527, 21)

'columns:'

Index(['Gender', 'ScheduledDay', 'AppointmentDay', 'Age', 'Neighbourhood',
       'Scholarship', 'Hypertension', 'Diabetes', 'Alcoholism', 'Handcap',
       'SMS_received', 'No-show', 'PatientNotes', 'PatientSentiment',
       'NoShowReason', 'WaitDays', 'anxiety', 'stress', 'confusion', 'hopeful',
       'fear'],
      dtype='object')

if is_step_enabled('eda'):
    # Distribution of Age - Using class-based approach
    plotter.plot_histplot(
        data=df,
        column='Age',
        bins=30,
        kde=True,
        title='Age Distribution',
        xlabel='Age',
        ylabel='Frequency',
        figsize=(10, 6)
    )

    # Countplot of No-show vs Show
    plotter.plot_countplot(
        data=df,
        column='No-show',
        title='Count of No-show vs Show',
        xlabel='No-show',
        ylabel='Count',
        figsize=(8, 5)
    )

    # Correlation heatmap 
    numeric_df = df.select_dtypes(include=[np.number])
    correlation_matrix = numeric_df.corr()
    plotter.plot_heatmap(
        data=correlation_matrix,
        title='Correlation Heatmap',
        fmt='.2f',
        cmap='coolwarm',
        square=True,
        figsize=(12, 8)
    )

if is_step_enabled('eda'):
    # Plot emotional states as a bar plot - Using class method
    plotter.plot_emotional_states_bar(df)

    # Plot word clouds for PatientSentiment, PatientNotes, and NoShowReason
    plotter.plot_text_wordcloud_custom_stopwords(df['PatientSentiment'], title='Patient Sentiment Word Cloud')
    plotter.plot_text_wordcloud_custom_stopwords(df['PatientNotes'], title='Patient Notes Word Cloud')
    plotter.plot_text_wordcloud_custom_stopwords(df['NoShowReason'], title='No-Show Reason Word Cloud')

Total unique words after filtering: 161
Top 10 most frequent words: {'confusion': 77779, 'anxiety': 62099, 'fear': 57339, 'stress': 55610, 'feels': 54774, 'health': 50047, 'blood': 35628, 'expresses': 34563, 'missed': 33931, 'alcohol': 29663}

Total unique words after filtering: 480
Top 10 most frequent words: {'routine': 75106, 'hypertension': 69899, 'health': 62876, 'provided': 61365, 'discussed': 56894, 'support': 54154, 'importance': 45468, 'strategies': 43443, 'addressed': 43435, 'prevention': 43424}

Total unique words after filtering: 477
Top 10 most frequent words: {'health': 35477, 'follow': 21074, 'including': 19948, 'attend': 15422, 'regular': 15302, 'family': 13660, 'recent': 13186, 'healthcare': 11945, 'especially': 11621, 'scheduled': 11592}

if is_step_enabled('supervised_prediction'):
    # Feature selection for modeling
    selected_features = [
        'WaitDays', 'SMS_received', 'Age', 'Alcoholism', 
        'Scholarship', 'Gender', 'Diabetes', 'Handcap'
    ]

    X = df[selected_features].copy()
    y = df['No-show']

    print(f"\nFeatures shape: {X.shape}")
    print(f"Target distribution: {y.value_counts().to_dict()}")
    print(f"No-show rate: {y.mean():.1%}")

Features shape: (110527, 8)
Target distribution: {0: 88208, 1: 22319}
No-show rate: 20.2%

if is_step_enabled('supervised_prediction'):
    
    # Initialize model
    no_show_model = NoShowPredictionModel(df, selected_features, 'No-show', plotter=plotter)

    # Split data
    no_show_model.split_data()

    # Analyze class imbalance
    class_distribution, class_weight_dict, scale_pos_weight = no_show_model.analyze_class_imbalance()
    print("CLASS IMBALANCE ANALYSIS")
    print("=" * 40)
    print(f"Training samples: {len(no_show_model.X_train):,}")
    print(f"Test samples: {len(no_show_model.X_test):,}")
    print(f"No-show rate: {class_distribution[1]:.1%}")
    class_ratio = (no_show_model.y_train == 0).sum() / (no_show_model.y_train == 1).sum()
    print(f"Class ratio (Show:No-show): {class_ratio:.1f}:1")
    print(f"Imbalance severity: {'High' if class_ratio > 3 else 'Moderate'}")
    print(f"\nClass weights: {class_weight_dict}")
    print(f"XGBoost scale_pos_weight: {scale_pos_weight:.2f}")

CLASS IMBALANCE ANALYSIS
========================================
Training samples: 88,421
Test samples: 22,106
No-show rate: 20.2%
Class ratio (Show:No-show): 4.0:1
Imbalance severity: High

Class weights: {0: 0.6265127681886461, 1: 2.4760851302156257}
XGBoost scale_pos_weight: 0.25

if is_step_enabled('supervised_prediction'):
    # Train baseline models
    base_summary = no_show_model.train_baseline_models()
    display(base_summary.round(3))

if is_step_enabled('supervised_prediction'):
    # Plot baseline model performance (F1, Precision, Accuracy, ROC_AUC) and confusion matrix
    plotter.plot_prediction_model_performance(
        base_summary,
        show_tuning_impact=False,
        X_test=no_show_model.X_test,
        y_test=no_show_model.y_test,
        models_dict=no_show_model.base_models
    )

    # Plot feature importances for the best baseline model
    best_base_model_name = base_summary.index[0]
    best_base_model = no_show_model.base_models[best_base_model_name]
    plotter.plot_prediction_feature_importances(best_base_model, no_show_model.features)

<Figure size 640x480 with 0 Axes>

# HYPERPARAMETERS = {
#     'logistic_regression': {
#         'C': [0.01, 0.1, 0.5, 1, 2, 5, 10, 20],
#         'solver': ['liblinear', 'lbfgs'],
#         'penalty': ['l1', 'l2'],
#         'class_weight': ['balanced', {0: 1, 1: 2}, {0: 1, 1: 3}, {0: 1, 1: 4}],
#         'max_iter': [1000, 2000]
#     },
#     'lr_n_iter': 20,
#     'random_forest': {
#         'bootstrap': [True, False],
#         'n_estimators': [100, 200, 300],
#         'max_depth': [3, 5, 7, 10, None],
#         'min_samples_split': [2, 5, 10, 20],
#         'min_samples_leaf': [1, 2, 4, 8],
#         'max_features': ['sqrt', 'log2', 0.5, 0.7],
#         'class_weight': ['balanced', 'balanced_subsample', {0: 1, 1: 2}, {0: 1, 1: 3}]
#     },
#     'rf_n_iter': 20,
#     'xgboost': {
#         'n_estimators': [50, 100, 200],
#         'max_depth': [3, 4, 5, 6],
#         'learning_rate': [0.01, 0.05, 0.1, 0.15],
#         'subsample': [0.7, 0.8, 0.9, 1.0],
#         'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
#         'reg_alpha': [0, 0.01, 0.1],
#         'reg_lambda': [0.5, 1, 1.5],
#         'gamma': [0, 0.1, 0.5]
#     },
#     'xgb_n_iter': 20
# }

if is_step_enabled('supervised_prediction'):
    lr_random, rf_random, xgb_fine = no_show_model.tune_hyperparameters()
    print("Hyperparameter tuning completed.")

    # After tuning, evaluate all models again
    results_df = no_show_model.evaluate_models()
    display(results_df.round(3))

Starting hyperparameter tuning for Logistic Regression...
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Logistic Regression F1: 0.408
Best Logistic Regression params: {'solver': 'liblinear', 'penalty': 'l1', 'max_iter': 1000, 'class_weight': {0: 1, 1: 4}, 'C': 0.01}
Starting hyperparameter tuning for Random Forest...
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Logistic Regression F1: 0.408
Best Logistic Regression params: {'solver': 'liblinear', 'penalty': 'l1', 'max_iter': 1000, 'class_weight': {0: 1, 1: 4}, 'C': 0.01}
Starting hyperparameter tuning for Random Forest...
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Random Forest F1: 0.444
Best Random Forest params: {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 0.7, 'max_depth': 7, 'class_weight': 'balanced_subsample', 'bootstrap': False}
Starting hyperparameter tuning for XGBoost...
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Random Forest F1: 0.444
Best Random Forest params: {'n_estimators': 100, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 0.7, 'max_depth': 7, 'class_weight': 'balanced_subsample', 'bootstrap': False}
Starting hyperparameter tuning for XGBoost...
Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best XGBoost F1: 0.067
Best XGBoost params: {'subsample': 0.7, 'reg_lambda': 0.5, 'reg_alpha': 0.01, 'n_estimators': 200, 'max_depth': 5, 'learning_rate': 0.15, 'gamma': 0, 'colsample_bytree': 0.9}
Hyperparameter tuning completed.
Best XGBoost F1: 0.067
Best XGBoost params: {'subsample': 0.7, 'reg_lambda': 0.5, 'reg_alpha': 0.01, 'n_estimators': 200, 'max_depth': 5, 'learning_rate': 0.15, 'gamma': 0, 'colsample_bytree': 0.9}
Hyperparameter tuning completed.

if is_step_enabled('supervised_prediction'):
    # Evaluate the optimized model
    print(f"Best Model: {no_show_model.best_model_name}")

    # Plot tuned model performance (F1, Precision, Accuracy, ROC_AUC) and confusion matrix
    plotter.plot_prediction_model_performance(
        results_df,
        show_tuning_impact=True,
        X_test=no_show_model.X_test,
        y_test=no_show_model.y_test,
        models_dict={**no_show_model.base_models, **no_show_model.tuned_models}
    )

    # Plot feature importances for the best model after tuning
    best_model = {**no_show_model.base_models, **no_show_model.tuned_models}[no_show_model.best_model_name]
    plotter.plot_prediction_feature_importances(best_model, no_show_model.features)

Best Model: Random Forest (Tuned)

if is_step_enabled('supervised_prediction'):
    # SMOTE + Threshold Optimization
    # Find best performing model from tuning
    scores = {
        'LR': lr_random.best_score_,
        'RF': rf_random.best_score_,
        'XGB': xgb_fine.best_score_
    }
    best_model_type = max(scores, key=scores.get)
    if best_model_type == 'XGB':
        best_base_model = xgb_fine.best_estimator_
    elif best_model_type == 'RF':
        best_base_model = rf_random.best_estimator_
    else:
        best_base_model = lr_random.best_estimator_

    optimized_smote_model, threshold_df = no_show_model.smote_threshold_optimization(best_base_model)
    print("SMOTE + Threshold optimization completed.")

Starting SMOTE + Threshold Optimization...
Threshold: 0.10 | F1: 0.347 | Precision: 0.210 | Recall: 0.994
Threshold: 0.15 | F1: 0.403 | Precision: 0.255 | Recall: 0.965
Threshold: 0.20 | F1: 0.423 | Precision: 0.272 | Recall: 0.955
Threshold: 0.25 | F1: 0.431 | Precision: 0.279 | Recall: 0.948
Threshold: 0.30 | F1: 0.435 | Precision: 0.283 | Recall: 0.940
Threshold: 0.35 | F1: 0.435 | Precision: 0.284 | Recall: 0.928
Threshold: 0.40 | F1: 0.440 | Precision: 0.291 | Recall: 0.897
Threshold: 0.45 | F1: 0.438 | Precision: 0.294 | Recall: 0.857
Threshold: 0.50 | F1: 0.436 | Precision: 0.296 | Recall: 0.822
Threshold: 0.55 | F1: 0.433 | Precision: 0.313 | Recall: 0.701
Threshold: 0.60 | F1: 0.400 | Precision: 0.330 | Recall: 0.507
Threshold: 0.65 | F1: 0.349 | Precision: 0.350 | Recall: 0.348
Threshold: 0.10 | F1: 0.347 | Precision: 0.210 | Recall: 0.994
Threshold: 0.15 | F1: 0.403 | Precision: 0.255 | Recall: 0.965
Threshold: 0.20 | F1: 0.423 | Precision: 0.272 | Recall: 0.955
Threshold: 0.25 | F1: 0.431 | Precision: 0.279 | Recall: 0.948
Threshold: 0.30 | F1: 0.435 | Precision: 0.283 | Recall: 0.940
Threshold: 0.35 | F1: 0.435 | Precision: 0.284 | Recall: 0.928
Threshold: 0.40 | F1: 0.440 | Precision: 0.291 | Recall: 0.897
Threshold: 0.45 | F1: 0.438 | Precision: 0.294 | Recall: 0.857
Threshold: 0.50 | F1: 0.436 | Precision: 0.296 | Recall: 0.822
Threshold: 0.55 | F1: 0.433 | Precision: 0.313 | Recall: 0.701
Threshold: 0.60 | F1: 0.400 | Precision: 0.330 | Recall: 0.507
Threshold: 0.65 | F1: 0.349 | Precision: 0.350 | Recall: 0.348
Threshold: 0.70 | F1: 0.237 | Precision: 0.397 | Recall: 0.169
Threshold: 0.75 | F1: 0.164 | Precision: 0.385 | Recall: 0.104
Threshold: 0.80 | F1: 0.000 | Precision: 0.000 | Recall: 0.000
Threshold: 0.85 | F1: 0.000 | Precision: 0.000 | Recall: 0.000
Optimal threshold: 0.40
Optimal F1 score: 0.440
SMOTE + Threshold optimization completed.
Threshold: 0.70 | F1: 0.237 | Precision: 0.397 | Recall: 0.169
Threshold: 0.75 | F1: 0.164 | Precision: 0.385 | Recall: 0.104
Threshold: 0.80 | F1: 0.000 | Precision: 0.000 | Recall: 0.000
Threshold: 0.85 | F1: 0.000 | Precision: 0.000 | Recall: 0.000
Optimal threshold: 0.40
Optimal F1 score: 0.440
SMOTE + Threshold optimization completed.

if is_step_enabled('supervised_prediction'):
    # Performance comparison visualization
    no_show_model.plot_performance(show_tuning_impact=True)

if is_step_enabled('supervised_prediction'):
    metrics = no_show_model.evaluate_model_metrics()
    display(metrics)

{'total_patients': 22106,
 'actual_no_shows': 4464,
 'precision': 0.30636174636174635,
 'recall': 0.8252688172043011,
 'f1': 0.446843350112196,
 'predicted_no_shows': 14571,
 'true_positives': 4463,
 'prevented_no_shows': 1115,
 'baseline_cost': 669600,
 'intervention_cost_total': 364275,
 'prevented_cost': 167250,
 'net_benefit': -197025,
 'roi': -54.08688490837966}

if is_step_enabled('supervised_prediction'):
    # Test predictions on the test set and display a sample
    if hasattr(no_show_model, 'X_test') and hasattr(no_show_model, 'y_test') and hasattr(no_show_model, 'best_model'):
        y_pred = no_show_model.best_model.predict(no_show_model.X_test)
        results_df = no_show_model.X_test.copy()
        results_df['Actual'] = no_show_model.y_test.values
        results_df['Predicted'] = y_pred
        display(results_df.head(10))
        print('Prediction test completed. Showing first 10 predictions.')
    else:
        print('Test set or best model not available for prediction test.')

Prediction test completed. Showing first 10 predictions.

if is_step_enabled('supervised_prediction'):
    # Export the best model
    if hasattr(no_show_model, 'best_model'):
        export_path = config.PREDICTION_MODEL_EXPORT_PATH
        # If export_path is a directory, append a filename
        if os.path.isdir(export_path):
            export_path = os.path.join(export_path, "best_model.pkl")
        else:
            # If the parent directory does not exist, create it
            parent_dir = os.path.dirname(export_path)
            if parent_dir and not os.path.exists(parent_dir):
                os.makedirs(parent_dir, exist_ok=True)
            if export_path and not os.path.exists(export_path):
                os.makedirs(export_path, exist_ok=True)
                export_path = os.path.join(export_path, "best_model.pkl")
        joblib.dump(no_show_model.best_model, export_path)
        print(f'Best model exported to: {export_path}')
    else:
        print('No best model found to export.')

Best model exported to: d:\Personal\AI-Admissions\Semester 3\AAI-510 - Machine learning Fundamentals and Applications\Final Team Project\aai510_3proj\models\supervised\prediction\best_model.pkl

if is_step_enabled('unsupervised_clustering'):
    clustering = ClusteringAnalysis(df)
    clustering.add_emotional_distress()
    clustering.standardize()
    clustering.run_pca(n_components=None)

if is_step_enabled('unsupervised_clustering'):
    pca_columns = clustering.numeric_df.columns.tolist()
    for i in range(0, len(pca_columns), 3):
        cols = pca_columns[i:i+3]
        plotter.plot_pca_3d_colored_by_features(clustering.X_pca, clustering.numeric_df, cols)

if is_step_enabled('unsupervised_clustering'):
    target_variance = 0.90
    top_n, X_reduced = clustering.select_top_n_components(target_variance=target_variance)
    print(f"Number of components to retain ≥ {target_variance*100:.0f}% variance: {top_n}")
    explained_df = clustering.explained_variance()
    plotter.plot_pca_explained_variance(explained_df)

Number of components to retain ≥ 90% variance: 12

if is_step_enabled('unsupervised_clustering'):
    feature_contributions = clustering.get_feature_contributions()
    print("Feature contributions to PCA components:")
    print(feature_contributions)
    loadings = clustering.get_loadings()
    plotter.plot_pca_biplot(clustering.X_pca, loadings, clustering.numeric_df.columns)

Feature contributions to PCA components:
                             PC1           PC2           PC3           PC4  \
Gender             -5.837364e-03  1.196770e-01 -2.650962e-03  2.812564e-01   
Age                -2.922519e-02  4.251152e-01 -1.368067e-01  4.240844e-01   
Scholarship        -1.874359e-01  2.514545e-01  1.135061e-01  2.408097e-01   
Hypertension       -4.289357e-01 -1.430078e-02  6.128584e-01  4.848147e-02   
Diabetes            4.684101e-01  2.633773e-02  2.858862e-01  5.194370e-02   
Alcoholism         -1.679672e-01 -5.768804e-02  2.836091e-01 -2.583521e-02   
Handcap            -2.703040e-01 -1.160922e-02  2.147822e-01  1.677240e-02   
SMS_received        5.402639e-01 -2.459760e-01  9.289261e-02  6.026642e-02   
No-show             2.195234e-01 -9.583449e-03  1.535400e-01  5.151367e-02   
WaitDays            2.360314e-01 -4.218770e-02  5.409839e-01  5.497996e-02   
anxiety            -1.762699e-01 -2.666693e-01 -9.259017e-02 -1.622846e-01   
stress              4.919207e-02  2.491538e-01  6.840836e-02 -5.300167e-02   
confusion           1.617717e-01  5.022303e-01  1.417780e-01  4.540366e-02   
hopeful            -5.884194e-02 -5.009183e-01  8.553286e-02  3.387939e-01   
fear               -1.996698e-02 -1.965803e-01 -1.377125e-01  7.223322e-01   
Emotional_Distress  3.844967e-17  6.255467e-17  1.177523e-16 -1.325946e-17   

                             PC5           PC6           PC7           PC8  \
Gender              2.134812e-01  7.807768e-02  1.169441e-03 -4.182740e-02   
Age                 3.169267e-01  5.984165e-02  1.291334e-01 -2.029923e-01   
Scholarship         2.024550e-01 -3.040014e-02 -1.243534e-02  5.427730e-01   
Hypertension        7.266704e-02 -8.438341e-02 -1.020836e-01 -1.919493e-01   
Diabetes           -5.432969e-03  7.794893e-01  1.033778e-01  2.634501e-02   
Alcoholism         -2.129636e-02 -9.466927e-02  8.309267e-01 -7.499322e-02   
Handcap            -3.281956e-02  2.227482e-01 -4.592875e-01 -2.248192e-01   
SMS_received        3.229407e-01 -3.771297e-01 -7.059994e-03 -1.291102e-01   
No-show             1.192008e-01 -3.120903e-01 -2.157472e-01 -1.058301e-03   
WaitDays            2.508276e-02 -1.151146e-01 -6.542622e-02 -4.200629e-02   
anxiety             7.856117e-01  1.689152e-01  8.516008e-03  1.221660e-01   
stress              1.639190e-01 -3.217790e-03 -8.208680e-02 -3.638676e-02   
confusion          -1.030195e-01 -1.805535e-01 -3.420521e-02  3.619571e-01   
hopeful            -1.329567e-01  4.009817e-02 -4.215159e-02  5.685171e-01   
fear               -1.211621e-01 -2.160880e-02  6.442436e-03 -2.825379e-01   
Emotional_Distress  2.480239e-16  1.209223e-16  1.397364e-16 -2.336754e-16   

                             PC9          PC10      PC11      PC12  \
Gender             -2.114255e-02 -4.189637e-02  0.357575  0.356088   
Age                -1.088313e-01 -1.268487e-01 -0.346938 -0.259728   
Scholarship         2.150536e-01  4.457632e-01 -0.029377 -0.168567   
Hypertension       -2.920192e-01 -4.109716e-01  0.004984 -0.123023   
Diabetes            3.548543e-03 -1.084770e-01 -0.048828  0.033457   
Alcoholism          3.528206e-01  3.669021e-02  0.023350  0.135824   
Handcap             6.944068e-01  6.151578e-02 -0.104534  0.053002   
SMS_received        3.648491e-01 -2.041364e-01  0.013887 -0.306378   
No-show            -5.858319e-03  1.266374e-02 -0.181105  0.477818   
WaitDays           -3.102966e-01  5.550358e-01 -0.047040 -0.012788   
anxiety            -7.545962e-02 -2.897839e-02 -0.065791  0.225610   
stress              3.529641e-02 -1.114575e-02  0.731644 -0.327285   
confusion           1.130039e-01 -4.033983e-01 -0.070987  0.279660   
hopeful            -1.837883e-02 -2.730736e-01  0.057532 -0.151360   
fear               -2.977789e-03  9.743676e-02  0.112411  0.152057   
Emotional_Distress  4.783145e-17 -3.414115e-17 -0.375777 -0.367721   

                            PC13          PC14      PC15      PC16  
Gender              7.733185e-02 -1.895983e-01  0.471776  0.575977  
Age                 3.913380e-01 -1.341778e-01  0.095273 -0.248936  
Scholarship        -2.391789e-01  3.696282e-01  0.160397 -0.017514  
Hypertension       -2.075549e-01  2.322649e-01  0.108737 -0.003848  
Diabetes           -6.594890e-02  2.364688e-01 -0.023187 -0.019167  
Alcoholism          1.547308e-01 -1.024315e-02 -0.111838  0.022410  
Handcap             1.401942e-01 -2.145590e-01 -0.032525 -0.041670  
SMS_received       -2.219283e-01  5.233089e-03  0.230610 -0.028386  
No-show             4.579666e-01  5.101516e-01 -0.182586  0.051987  
WaitDays            5.375094e-02 -4.593302e-01 -0.055853 -0.056386  
anxiety            -1.473045e-01 -1.628967e-01 -0.295996 -0.067693  
stress              2.561858e-01  7.975534e-02 -0.421051 -0.002526  
confusion          -2.701598e-01 -3.368572e-01 -0.278478 -0.035756  
hopeful             3.813475e-01 -1.749224e-01 -0.002539 -0.045723  
fear               -3.535912e-01  7.201160e-02 -0.382648 -0.057622  
Emotional_Distress -2.035829e-16  1.586268e-16 -0.371732  0.765107

if is_step_enabled('unsupervised_clustering'):
    wcss, optimal_k = clustering.elbow_method(k_range=range(1, 15))
    plotter.plot_elbow_curve(list(range(1, 15)), wcss, optimal_k)

if is_step_enabled('unsupervised_clustering'):
    k_values = range(2, 8)

    kmeans_scores, gmm_scores, kmeans_labels_list, gmm_labels_list = clustering.evaluate_clustering_performance(k_values)
    for k in k_values:
        kmeans_labels = kmeans_labels_list[k - k_values.start]
        gmm_labels = gmm_labels_list[k - k_values.start]
        plotter.plot_clustering_3d_side_by_side(clustering.X_reduced, kmeans_labels, gmm_labels, k)

if is_step_enabled('unsupervised_clustering'):
    print('KMeans Scores:')
    display(kmeans_scores)
    print('GMM Scores:')
    display(gmm_scores)
    plotter.plot_clustering_scores(kmeans_scores, gmm_scores)

KMeans Scores:

GMM Scores:

if is_step_enabled('nlp_sentiment_analysis'):
    # First, let's check what columns are available in our DataFrame
    print("Available columns:", df.columns.tolist())
    print("DataFrame shape:", df.shape)
    
    # Check if emotion columns exist, if not create them from PatientSentiment text
    emotion_columns_exist = all(col in df.columns for col in EMOTION_STATES)
    print(f"Emotion columns exist: {emotion_columns_exist}")
    
    if not emotion_columns_exist:
        print("Creating emotion columns from PatientSentiment text...")
        # Create emotion columns by checking if emotion words appear in PatientSentiment
        for emotion in EMOTION_STATES:
            df[emotion] = df['PatientSentiment'].str.lower().str.contains(emotion, na=False).astype(int)
        print("Emotion columns created successfully!")
    
    # Now create our sentiment analysis DataFrame
    features = ['PatientSentiment', 'No-show'] + EMOTION_STATES
    available_features = [col for col in features if col in df.columns]
    print(f"Using features: {available_features}")
    
    sa_df = df[available_features].dropna()
    print("Sentiment Analysis DataFrame shape:", sa_df.shape)
    print("Sentiment Analysis DataFrame columns:", sa_df.columns.tolist())
    print("Sample emotion distribution:")
    for emotion in EMOTION_STATES:
        if emotion in sa_df.columns:
            print(f"  {emotion}: {sa_df[emotion].sum()} positive cases out of {len(sa_df)}")

Available columns: ['Gender', 'ScheduledDay', 'AppointmentDay', 'Age', 'Neighbourhood', 'Scholarship', 'Hypertension', 'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received', 'No-show', 'PatientNotes', 'PatientSentiment', 'NoShowReason', 'WaitDays', 'anxiety', 'stress', 'confusion', 'hopeful', 'fear']
DataFrame shape: (110527, 21)
Emotion columns exist: True
Using features: ['PatientSentiment', 'No-show', 'anxiety', 'stress', 'confusion', 'hopeful', 'fear']
Sentiment Analysis DataFrame shape: (110527, 7)
Sentiment Analysis DataFrame columns: ['PatientSentiment', 'No-show', 'anxiety', 'stress', 'confusion', 'hopeful', 'fear']
Sample emotion distribution:
  anxiety: 54377 positive cases out of 110527
  stress: 66678 positive cases out of 110527
  confusion: 58866 positive cases out of 110527
  hopeful: 11753 positive cases out of 110527
  fear: 47130 positive cases out of 110527

if is_step_enabled('nlp_sentiment_analysis'):
   # Initialize the improved sentiment analysis model
    print("\nInitializing improved sentiment analysis model...")
    sa_model = SentimentAnalysisModel(sa_df, emotional_states=EMOTION_STATES, device=NLP_CONFIG['device'])

    # Train the model with improved anti-overfitting techniques
    print("Training model with improved regularization...")
    sa_model.train(epochs=5, patience=3)  # More epochs but better patience for proper training

Initializing improved sentiment analysis model...

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.

Training model with improved regularization...
Epoch 1/5

Training: 100%|██████████| 4698/4698 [05:36<00:00, 13.97it/s, loss=0.366]
Validating: 100%|██████████| 415/415 [00:15<00:00, 25.98it/s]

Epoch 1: Train Loss: 0.5625, Val Loss: 0.3612
Epoch 2/5

Training: 100%|██████████| 4698/4698 [04:03<00:00, 19.31it/s, loss=0.317]
Validating: 100%|██████████| 415/415 [00:15<00:00, 26.39it/s]

Epoch 2: Train Loss: 0.3471, Val Loss: 0.2367
Epoch 3/5

Training: 100%|██████████| 4698/4698 [04:13<00:00, 18.53it/s, loss=0.319]
Validating: 100%|██████████| 415/415 [00:14<00:00, 28.55it/s]

Epoch 3: Train Loss: 0.2972, Val Loss: 0.1981
Epoch 4/5

Training: 100%|██████████| 4698/4698 [05:07<00:00, 15.30it/s, loss=0.235]
Validating: 100%|██████████| 415/415 [00:16<00:00, 25.45it/s]

if is_step_enabled('nlp_sentiment_analysis'):
    # Evaluate the model with threshold tuning
    print("Evaluating model with optimized thresholds...")
    predictions, actual_labels = sa_model.evaluate()

    # Get metrics
    sentiment_analysis_metrics = sa_model.report(predictions, actual_labels)
    print("Training completed successfully!")

Evaluating model with optimized thresholds...

Evaluating: 100%|██████████| 691/691 [00:26<00:00, 26.35it/s]

Optimal threshold for anxiety: 0.55, F1: 0.962
Optimal threshold for stress: 0.45, F1: 0.960
Optimal threshold for confusion: 0.45, F1: 0.955
Optimal threshold for hopeful: 0.20, F1: 0.934
Optimal threshold for fear: 0.50, F1: 0.955
Training completed successfully!

if is_step_enabled('nlp_sentiment_analysis'):
    # Print metrics in a readable format
    plotter.print_sentiment_metrics(sentiment_analysis_metrics)

    # Plot accuracy by emotion with overall accuracy line
    plotter.plot_accuracy_by_emotion(sentiment_analysis_metrics)

    # Plot confusion matrices for each emotion
    plotter.plot_confusion_matrices(actual_labels, predictions, sa_model.emotional_states)

    # Plot training and validation loss
    sa_stats = sa_model.get_training_stats()
    plotter.plot_training_validation_loss(sa_stats['training_losses'], sa_stats['validation_losses'])

    # Plot time taken per epoch
    plotter.plot_epoch_times(sa_stats['epoch_times'])

Model Accuracy by Emotion:
  anxiety: 0.9642
  stress: 0.9536
  confusion: 0.9533
  hopeful: 0.9870
  fear: 0.9633

Overall Accuracy: 0.9643

Classification Reports:

Anxiety:
  Not Present: {'precision': 0.9339512358049432, 'recall': 1.0, 'f1-score': 0.9658477613229135, 'support': 11185.0}
  Present: {'precision': 1.0, 'recall': 0.9275707352806519, 'f1-score': 0.9624245879055627, 'support': 10921.0}
  macro avg: {'precision': 0.9669756179024716, 'recall': 0.963785367640326, 'f1-score': 0.9641361746142381, 'support': 22106.0}
  weighted avg: {'precision': 0.9665812255712607, 'recall': 0.9642178594046865, 'f1-score': 0.9641566151684355, 'support': 22106.0}

Stress:
  Not Present: {'precision': 0.8970512157268494, 'recall': 0.9964371911274567, 'f1-score': 0.9441359032995753, 'support': 8701.0}
  Present: {'precision': 0.9975082388875492, 'recall': 0.9257739649384558, 'f1-score': 0.9603033351388996, 'support': 13405.0}
  macro avg: {'precision': 0.9472797273071993, 'recall': 0.9611055780329563, 'f1-score': 0.9522196192192375, 'support': 22106.0}
  weighted avg: {'precision': 0.9579679982957982, 'recall': 0.9535872613770017, 'f1-score': 0.9539397766283613, 'support': 22106.0}

Confusion:
  Not Present: {'precision': 0.9293095564700503, 'recall': 0.9750527729802341, 'f1-score': 0.9516317834901906, 'support': 10422.0}
  Present: {'precision': 0.9767254498254409, 'recall': 0.9338411502909962, 'f1-score': 0.9548020126886896, 'support': 11684.0}
  macro avg: {'precision': 0.9530175031477456, 'recall': 0.9544469616356152, 'f1-score': 0.9532168980894401, 'support': 22106.0}
  weighted avg: {'precision': 0.9543709559979787, 'recall': 0.9532706052655388, 'f1-score': 0.95330739002033, 'support': 22106.0}

Hopeful:
  Not Present: {'precision': 0.9856964864191378, 'recall': 1.0, 'f1-score': 0.9927967271540797, 'support': 19778.0}
  Present: {'precision': 1.0, 'recall': 0.8767182130584192, 'f1-score': 0.9343099107347219, 'support': 2328.0}
  macro avg: {'precision': 0.9928482432095689, 'recall': 0.9383591065292096, 'f1-score': 0.9635533189444008, 'support': 22106.0}
  weighted avg: {'precision': 0.9872028005246407, 'recall': 0.987017099430019, 'f1-score': 0.9866374351689053, 'support': 22106.0}

Fear:
  Not Present: {'precision': 0.9413295657346817, 'recall': 0.9981864059296641, 'f1-score': 0.9689246077305779, 'support': 12682.0}
  Present: {'precision': 0.9973434973434974, 'recall': 0.9162775891341256, 'f1-score': 0.9550934631124876, 'support': 9424.0}
  macro avg: {'precision': 0.9693365315390896, 'recall': 0.9572319975318948, 'f1-score': 0.9620090354215327, 'support': 22106.0}
  weighted avg: {'precision': 0.965208842468667, 'recall': 0.9632678910702976, 'f1-score': 0.9630282580119096, 'support': 22106.0}

# HYPERPARAMETERS = {
#     'tinybert': [
#         {'learning_rate': 5e-5, 'batch_size': 16, 'epochs': 2, 'patience': 1, 'accumulation_steps': 4},
#         {'learning_rate': 1e-4, 'batch_size': 16, 'epochs': 2, 'patience': 1, 'accumulation_steps': 4},
#         {'learning_rate': 3e-5, 'batch_size': 8, 'epochs': 6, 'patience': 2, 'accumulation_steps': 2},
#         {'learning_rate': 2e-5, 'batch_size': 32, 'epochs': 4, 'patience': 2, 'accumulation_steps': 4},
#         {'learning_rate': 1e-5, 'batch_size': 16, 'epochs': 8, 'patience': 3, 'accumulation_steps': 2},
#     ]
# }

if is_step_enabled('nlp_sentiment_analysis'):
    # Prepare data splits for hyperparameter tuning
    X = df['PatientSentiment'].values
    y = df[EMOTION_STATES].values

    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=NLP_CONFIG['epochs'])
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.15, random_state=NLP_CONFIG['epochs'])

    # Run hyperparameter tuning using the class method
    results = SentimentAnalysisModel.run_hyperparameter_tuning(
        X_train, y_train, X_val, y_val, X_test, y_test,
        emotional_states=EMOTION_STATES,
        device=NLP_CONFIG['device'],
        tokenizer=sa_model.tokenizer,
        max_seq_length=NLP_CONFIG['max_length']
    )

--- Hyperparameter Configuration 1/2 ---
Learning Rate: 5e-05
Batch Size: 16
Max Epochs: 2
Early Stopping Patience: 1

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training Epoch 1: 100%|██████████| 4698/4698 [05:08<00:00, 15.25it/s, loss=0.213]
Validating Epoch 1: 100%|██████████| 415/415 [00:18<00:00, 22.05it/s]

Epoch 1: Train Loss: 0.3582, Val Loss: 0.0007

Training Epoch 2: 100%|██████████| 4698/4698 [05:33<00:00, 14.10it/s, loss=0.354]
Validating Epoch 2: 100%|██████████| 415/415 [00:16<00:00, 25.50it/s]

Epoch 2: Train Loss: 0.2659, Val Loss: 0.0007
Early stopping counter: 1/1
Early stopping triggered after 2 epochs

Evaluating: 100%|██████████| 691/691 [00:26<00:00, 25.72it/s]

Overall Accuracy: 0.9614
Training Time: 676.38 seconds

--- Hyperparameter Configuration 2/2 ---
Learning Rate: 0.0001
Batch Size: 16
Max Epochs: 2
Early Stopping Patience: 1

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training Epoch 1: 100%|██████████| 4698/4698 [05:41<00:00, 13.74it/s, loss=0.188]
Validating Epoch 1: 100%|██████████| 415/415 [00:14<00:00, 27.95it/s]

Epoch 1: Train Loss: 0.2736, Val Loss: 0.0005

Training Epoch 2: 100%|██████████| 4698/4698 [05:17<00:00, 14.81it/s, loss=0.184]
Validating Epoch 2: 100%|██████████| 415/415 [00:14<00:00, 29.04it/s]

if is_step_enabled('nlp_sentiment_analysis'):
    
    # Print and plot metrics for each configuration
    for i, res in enumerate(results):
        print(f"\n--- Results for Hyperparameter Configuration {i+1} ---")
        # Compute metrics for each configuration
        emotion_accuracies = {emo: accuracy_score(res['actual_labels'][:, idx], res['predictions'][:, idx]) for idx, emo in enumerate(EMOTION_STATES)}
        sentiment_analysis_metrics = {
            'emotion_accuracies': emotion_accuracies,
            'overall_accuracy': res['accuracy'],
            'classification_reports': {}  # Optionally fill with classification_report if needed
        }
        plotter.print_sentiment_metrics(sentiment_analysis_metrics)
        plotter.plot_accuracy_by_emotion(sentiment_analysis_metrics)
        plotter.plot_confusion_matrices(res['actual_labels'], res['predictions'], EMOTION_STATES)
        plotter.plot_training_validation_loss(res['train_losses'], res['val_losses'])
        plotter.plot_epoch_times(res['epoch_times'])

--- Results for Hyperparameter Configuration 1 ---
Model Accuracy by Emotion:
  anxiety: 0.9627
  stress: 0.9488
  confusion: 0.9525
  hopeful: 0.9862
  fear: 0.9571

Overall Accuracy: 0.9614

Classification Reports:

--- Results for Hyperparameter Configuration 2 ---
Model Accuracy by Emotion:
  anxiety: 0.9626
  stress: 0.9499
  confusion: 0.9526
  hopeful: 0.9877
  fear: 0.9607

Overall Accuracy: 0.9627

Classification Reports:

if is_step_enabled('nlp_sentiment_analysis'):
    # Select the best model based on accuracy and training time using the class method
    best_model, best_params, best_idx, combined_scores = SentimentAnalysisModel.get_best_model_from_results(results)

    print(f"\nBest model configuration (balanced for both accuracy and speed):")
    print(f"Learning Rate: {best_params['learning_rate']}")
    print(f"Batch Size: {best_params['batch_size']}")
    print(f"Epochs: {best_params['epochs']}")
    print(f"Accuracy: {results[best_idx]['accuracy']:.4f}")
    print(f"Training Time: {results[best_idx].get('training_time', sum(results[best_idx]['epoch_times'])):.2f} seconds")
    print(f"Combined Score: {combined_scores[best_idx]:.4f}")

    # Plot ROC and AUC for each emotion using the class-based plotter
    plotter.plot_roc_auc_by_emotion(actual_labels, predictions, EMOTION_STATES)

Best model configuration (balanced for both accuracy and speed):
Learning Rate: 5e-05
Batch Size: 16
Epochs: 2
Accuracy: 0.9614
Training Time: 676.38 seconds
Combined Score: 0.9730

if is_step_enabled('nlp_sentiment_analysis'):
    # Export the best model and tokenizer after hyperparameter tuning
    SentimentAnalysisModel.export_best_model(
        best_model,
        sa_model.tokenizer,
        SENTIMENT_MODEL_EXPORT_PATH_RAW
    )

Best model and tokenizer exported to: d:\Personal\AI-Admissions\Semester 3\AAI-510 - Machine learning Fundamentals and Applications\Final Team Project\aai510_3proj\models\nlp\sentiment_analysis_raw

if is_step_enabled('nlp_sentiment_analysis'):
    example_text = "Patient (minor) is anxious and fearful about medical procedures, sometimes confused by instructions, and stressed by separation from family."
    expected = {'anxiety': True, 'stress': True, 'confusion': True, 'hopeful': False, 'fear': True}

    raw_pred = SentimentAnalysisModel.predict_emotions_raw(
        example_text,
        sa_model.model,
        sa_model.tokenizer,
        NLP_CONFIG['device']
    )

    print("Example text:")
    print(example_text)
    print("\nEmotion prediction comparison:")
    for emo in expected:
        result = "✅" if raw_pred[emo] == expected[emo] else "❌"
        print(f"{emo}: expected={expected[emo]}, predicted={raw_pred[emo]} {result}")

Example text:
Patient (minor) is anxious and fearful about medical procedures, sometimes confused by instructions, and stressed by separation from family.

Emotion prediction comparison:
anxiety: expected=True, predicted=False ❌
stress: expected=True, predicted=False ❌
confusion: expected=True, predicted=False ❌
hopeful: expected=False, predicted=False ✅
fear: expected=True, predicted=False ❌

if is_step_enabled('nlp_sentiment_analysis'):
    # Run the raw model test
    !pytest -s ../tests/test_sentiment_anlaysis.py -k test_sentiment_model_predictions_raw --maxfail=1 --disable-warnings -q

--- Running test_sentiment_model_predictions_raw ---
❌ Test FAILED for: Patient is hopeful and shows no significant anxiety, stress, or fear related to health conditions.
Prediction: {'anxiety': 1, 'stress': 1, 'confusion': 0, 'hopeful': 0, 'fear': 1}
Expected: ['hopeful']

F
================================== FAILURES ===================================
____________________ test_sentiment_model_predictions_raw _____________________

    def test_sentiment_model_predictions_raw():
        print("\n--- Running test_sentiment_model_predictions_raw ---")
        model, tokenizer, _ = _load_model_and_tokenizer(SENTIMENT_MODEL_EXPORT_PATH_RAW)
        model.model.eval()
        results = []
        for text in TEST_TEXTS:
            encoding = tokenizer.encode_plus(
                text,
                add_special_tokens=True,
                max_length=NLP_CONFIG['max_length'],
                return_token_type_ids=False,
                padding='max_length',
                truncation=True,
                return_attention_mask=True,
                return_tensors='pt',
            )
            input_ids = encoding['input_ids'].to(NLP_CONFIG['device'])
            attention_mask = encoding['attention_mask'].to(NLP_CONFIG['device'])
            with torch.no_grad():
                outputs = model.model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs.logits
                probs = torch.sigmoid(logits).cpu().numpy()[0]
                preds = (probs >= 0.5).astype(int)
            results.append(preds)
        for pred in results:
            assert len(pred) == len(EMOTION_STATES)
            assert all((p == 0 or p == 1) for p in pred)
>       passed, total = _print_and_score_results(results, TEST_TEXTS, EXPECTED_EMOTIONS)

..\tests\test_sentiment_anlaysis.py:89: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

results = [array([1, 1, 0, 0, 1]), array([1, 1, 0, 0, 1]), array([0, 1, 1, 0, 1]), array([0, 0, 0, 0, 0]), array([1, 1, 0, 0, 1]), array([0, 0, 0, 0, 0]), ...]
test_texts = ['Patient is hopeful and shows no significant anxiety, stress, or fear related to health conditions.', 'Patient expres... or fear during the appointment.', 'Patient is confused about the medication schedule and expresses frustration.', ...]
expected_emotions = [['hopeful'], ['fear', 'anxiety'], ['fear', 'confusion', 'stress'], ['anxiety', 'fear', 'confusion', 'stress'], [], ['confusion'], ...]

    def _print_and_score_results(results, test_texts, expected_emotions):
        total = len(test_texts)
        passed = 0
        for idx, (text, pred) in enumerate(zip(test_texts, results)):
            pred_dict = {emo: int(val) for emo, val in zip(EMOTION_STATES, pred)}
            test_passed = True
            for emo in expected_emotions[idx]:
                if pred_dict[emo] != 1:
                    test_passed = False
                    print(f"\u274C Test FAILED for: {text}\nPrediction: {pred_dict}\nExpected: {expected_emotions[idx]}\n")
>                   assert False, f"Expected emotion '{emo}' to be present in: {text} (got {pred_dict})"
E                   AssertionError: Expected emotion 'hopeful' to be present in: Patient is hopeful and shows no significant anxiety, stress, or fear related to health conditions. (got {'anxiety': 1, 'stress': 1, 'confusion': 0, 'hopeful': 0, 'fear': 1})
E                   assert False

..\tests\test_sentiment_anlaysis.py:56: AssertionError
=========================== short test summary info ===========================
FAILED ..\tests\test_sentiment_anlaysis.py::test_sentiment_model_predictions_raw - AssertionError: Expected emotion 'hopeful' to be present in: Patient is hop...
!!!!!!!!!!!!!!!!!!!!!!!!!! stopping after 1 failures !!!!!!!!!!!!!!!!!!!!!!!!!!
1 failed, 1 deselected in 35.87s

if is_step_enabled('nlp_sentiment_analysis'):
    example_text = "Patient (minor) is anxious and fearful about medical procedures, sometimes confused by instructions, and stressed by separation from family."
    expected = {'anxiety': True, 'stress': True, 'confusion': True, 'hopeful': False, 'fear': True}

    post_processed = SentimentAnalysisModel.predict_emotions(
        example_text,
        sa_model.model,
        sa_model.tokenizer,
        NLP_CONFIG['device'],
        emotion_variations_path=EMOTION_VARIATIONS_PATH,
        negation_patterns_path=NEGATION_PATTERNS_PATH
    )
    print("Post-processed emotion prediction:", post_processed)
    print("Example text:")
    print(example_text)
    print("\nEmotion prediction comparison:")
    for emo in expected:
        result = "✅" if post_processed[emo] == expected[emo] else "❌"
        print(f"{emo}: expected={expected[emo]}, predicted={post_processed[emo]} {result}")

Post-processed emotion prediction: {'anxiety': True, 'stress': True, 'confusion': True, 'hopeful': False, 'fear': True}
Example text:
Patient (minor) is anxious and fearful about medical procedures, sometimes confused by instructions, and stressed by separation from family.

Emotion prediction comparison:
anxiety: expected=True, predicted=True ✅
stress: expected=True, predicted=True ✅
confusion: expected=True, predicted=True ✅
hopeful: expected=False, predicted=False ✅
fear: expected=True, predicted=True ✅

if is_step_enabled('nlp_sentiment_analysis'):
    # Evaluate the model with post-processing on the test set
    results_post = SentimentAnalysisModel.evaluate_model_with_post_processing(
        sa_model.model,
        sa_model.test_loader,
        sa_model.tokenizer,
        NLP_CONFIG['device'],
        emotion_variations_path=EMOTION_VARIATIONS_PATH,
        negation_patterns_path=NEGATION_PATTERNS_PATH
    )
    print("\nPost-processing overall accuracy:", results_post['accuracy'])
    print("Emotion-wise accuracies:", results_post['emotion_accuracies'])

Evaluating with Post-Processing: 100%|██████████| 691/691 [03:08<00:00,  3.67it/s]

Post-processing overall accuracy: 0.669266262553153
Emotion-wise accuracies: {'anxiety': 0.6084320998823849, 'stress': 0.6616303266081607, 'confusion': 0.9215145209445399, 'hopeful': 0.46001085678096443, 'fear': 0.694743508549715}

if is_step_enabled('nlp_sentiment_analysis'):
    # Export the optimized model and tokenizer with post-processor config
    os.makedirs(SENTIMENT_MODEL_EXPORT_PATH_OPTIMIZED, exist_ok=True)
    shutil.copy(EMOTION_VARIATIONS_PATH, os.path.join(SENTIMENT_MODEL_EXPORT_PATH_OPTIMIZED, os.path.basename(EMOTION_VARIATIONS_PATH)))
    shutil.copy(NEGATION_PATTERNS_PATH, os.path.join(SENTIMENT_MODEL_EXPORT_PATH_OPTIMIZED, os.path.basename(NEGATION_PATTERNS_PATH)))
    SentimentAnalysisModel.export_best_model(
        best_model,
        sa_model.tokenizer,
        SENTIMENT_MODEL_EXPORT_PATH_OPTIMIZED
    )
    print(f"Optimized model and post-processor config exported to: {SENTIMENT_MODEL_EXPORT_PATH_OPTIMIZED}")

Best model and tokenizer exported to: d:\Personal\AI-Admissions\Semester 3\AAI-510 - Machine learning Fundamentals and Applications\Final Team Project\aai510_3proj\models\nlp\sentiment_analysis_optimized
Optimized model and post-processor config exported to: d:\Personal\AI-Admissions\Semester 3\AAI-510 - Machine learning Fundamentals and Applications\Final Team Project\aai510_3proj\models\nlp\sentiment_analysis_optimized

if is_step_enabled('nlp_sentiment_analysis'):
    # Run the optimized model test
    !pytest -s ../tests/test_sentiment_anlaysis.py -k test_sentiment_model_predictions_optimized  --maxfail=1 --disable-warnings -q

--- Running test_sentiment_model_predictions_optimized ---
✅ Test PASSED for: Patient is hopeful and shows no significant anxiety, stress, or fear related to health conditions.
Prediction: {'anxiety': 0, 'stress': 0, 'confusion': 0, 'hopeful': 1, 'fear': 0}
Expected: ['hopeful']

✅ Test PASSED for: Patient expresses fear and anxiety about high blood pressure and possible complications.
Prediction: {'anxiety': 1, 'stress': 0, 'confusion': 0, 'hopeful': 0, 'fear': 1}
Expected: ['fear', 'anxiety']

✅ Test PASSED for: Elderly patient expresses fear of declining health, confusion about medications, and stress related to mobility issues.
Prediction: {'anxiety': 1, 'stress': 1, 'confusion': 1, 'hopeful': 0, 'fear': 1}
Expected: ['fear', 'confusion', 'stress']

✅ Test PASSED for: Patient (minor) is anxious and fearful about medical procedures, sometimes confused by instructions, and stressed by separation from family.
Prediction: {'anxiety': 1, 'stress': 1, 'confusion': 1, 'hopeful': 0, 'fear': 1}
Expected: ['anxiety', 'fear', 'confusion', 'stress']

✅ Test PASSED for: Patient is calm and shows no signs of stress, anxiety, or fear during the appointment.
Prediction: {'anxiety': 1, 'stress': 1, 'confusion': 0, 'hopeful': 0, 'fear': 1}
Expected: []

✅ Test PASSED for: Patient is confused about the medication schedule and expresses frustration.
Prediction: {'anxiety': 0, 'stress': 0, 'confusion': 1, 'hopeful': 0, 'fear': 0}
Expected: ['confusion']

✅ Test PASSED for: Patient is hopeful about recovery but still experiences occasional stress.
Prediction: {'anxiety': 0, 'stress': 1, 'confusion': 0, 'hopeful': 1, 'fear': 0}
Expected: ['hopeful', 'stress']

✅ Test PASSED for: Patient is fearful of surgery and anxious about the outcome.
Prediction: {'anxiety': 1, 'stress': 0, 'confusion': 0, 'hopeful': 0, 'fear': 1}
Expected: ['fear', 'anxiety']

✅ Test PASSED for: Patient expresses both hope and anxiety regarding the new treatment plan.
Prediction: {'anxiety': 1, 'stress': 0, 'confusion': 0, 'hopeful': 1, 'fear': 0}
Expected: ['hopeful', 'anxiety']

✅ Test PASSED for: Patient is neither anxious nor fearful, but is confused by the instructions.
Prediction: {'anxiety': 1, 'stress': 0, 'confusion': 1, 'hopeful': 0, 'fear': 1}
Expected: ['confusion']


Test score (optimized): 10/10 passed.

.
1 passed, 1 deselected in 8.63s

if is_step_enabled('nlp_topic_modeling'):
    model = ClinicalTopicModel(config)
    conditions = ['diabetes', 'hypertension', 'alcohol']
    perplexities = []
    silhouette_scores = []
    all_topics = []

    for cond in conditions:
        df_cond = model.preprocess_notes(df, cond)
        if df_cond.empty:
            print(f"Skipping {cond}: No clinical concepts found after MedSpaCy extraction.")
            perplexities.append(None)
            silhouette_scores.append(None)
            all_topics.append([])
            continue
        model.train(df_cond['PatientNotes_clean'])
        perplexity, sil_score = model.evaluate(df_cond['PatientNotes_clean'])
        print(f"\n--- {cond.title()} ---")
        print(f"Model Perplexity: {perplexity:.2f}")
        if sil_score is not None:
            print(f"Silhouette Score: {sil_score:.2f}")
        topics = model.get_topics(n_top_words=10)
        for idx, topic_words in enumerate(topics):
            print(f"Topic {idx+1}: {' '.join(topic_words)}")
        perplexities.append(perplexity)
        silhouette_scores.append(sil_score if sil_score is not None else 0)
        all_topics.append(topics)

Fitting 2 folds for each of 5 candidates, totalling 10 fits

--- Diabetes ---
Model Perplexity: 17.41
Silhouette Score: 0.10
Topic 1: diabetes hba1c hypertension metformin type_2_diabetes insulin fasting_glucose atenolol hypoglycemia glipizide
Topic 2: hypertension diabetes blood_pressure diabetes_screening cardiovascular_risk_assessment alcoholism weight_management_counseling amlodipine medication_adherence_counseling cholesterol_screening
Fitting 2 folds for each of 5 candidates, totalling 10 fits

--- Hypertension ---
Model Perplexity: 14.13
Silhouette Score: 0.56
Topic 1: blood_pressure amlodipine sleep_hygiene_education losartan ast alt thiamine folic_acid hepatic_steatosis negated_hypertension
Topic 2: diabetes hba1c cardiovascular_risk_assessment metformin fasting_glucose atenolol type_2_diabetes negated_hypertension negated_amlodipine chronic_kidney_disease
Topic 3: alcoholism weight_management_counseling diabetes_screening medication_adherence_counseling patient_education alcohol_screening hydrochlorothiazide resistant_hypertension obstructive_sleep_apnea left_ventricular_hypertrophy
Fitting 2 folds for each of 5 candidates, totalling 10 fits

--- Alcohol ---
Model Perplexity: 20.55
Silhouette Score: 0.10
Topic 1: alcoholism alcohol_use_disorder alt ast motivational_interviewing heavy_drinking thiamine folic_acid disulfiram acamprosate
Topic 2: hypertension alcoholism diabetes_screening weight_management_counseling blood_pressure patient_education cholesterol_screening medication_adherence_counseling ecg atrial_fibrillation
Topic 3: hypertension alcohol_screening hydrochlorothiazide blood_pressure amlodipine losartan left_ventricular_hypertrophy echocardiogram chronic_kidney_disease proteinuria
Topic 4: diabetes hba1c hypertension hypoglycemia glipizide metformin fasting_glucose alcoholism type_2_diabetes insulin

if is_step_enabled('nlp_topic_modeling'):

    # Visualize clinical entities for a note after topic modeling
    if not df_cond.empty:
        # Visualize clinical entities for 10 notes after topic modeling
        for i in range(min(10, len(df_cond))):
            sample_note = df_cond['PatientNotes'].iloc[i]
            print(f'Visualizing clinical entities for a note {i+1}:')
            model.plot_medspacy_ents(sample_note)
        print('Visualizing clinical entities for a note:')
        model.plot_medspacy_ents(sample_note)
    else:
        print('No notes available for visualization.')

    # Plot word clouds for each condition after the loop
    for cond in conditions:
        plotter.plot_wordclouds(model.model, model.vectorizer, cond)

[autoreload of src.config failed: Traceback (most recent call last):
  File "d:\Personal\AI-Admissions\Semester 3\AAI-510 - Machine learning Fundamentals and Applications\Final Team Project\aai510_3proj\.venv\lib\site-packages\IPython\extensions\autoreload.py", line 276, in check
    superreload(m, reload, self.old_objects)
  File "d:\Personal\AI-Admissions\Semester 3\AAI-510 - Machine learning Fundamentals and Applications\Final Team Project\aai510_3proj\.venv\lib\site-packages\IPython\extensions\autoreload.py", line 475, in superreload
    module = reload(module)
  File "C:\Python310\lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 619, in _exec
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "d:\Personal\AI-Admissions\Semester 3\AAI-510 - Machine learning Fundamentals and Applications\Final Team Project\aai510_3proj\src\config.py", line 6, in <module>
    from dotenv import load_dotenv
ModuleNotFoundError: No module named 'dotenv'
]

Visualizing clinical entities for a note 1:

Visualizing clinical entities for a note 2:

Visualizing clinical entities for a note 3:

Visualizing clinical entities for a note 4:

Visualizing clinical entities for a note 5:

Visualizing clinical entities for a note 6:

Visualizing clinical entities for a note 7:

Visualizing clinical entities for a note 8:

Visualizing clinical entities for a note 9:

if is_step_enabled('nlp_topic_modeling'):    
    # Plot Perplexity and Silhouette Score using plotter
    plotter.plot_bar(conditions, perplexities, title='LDA Model Perplexity by Condition (MedSpaCy)', ylabel='Perplexity')
    plotter.plot_bar(conditions, silhouette_scores, title='LDA Silhouette Score by Condition (MedSpaCy)', ylabel='Silhouette Score')

	PatientId	AppointmentID	Gender	ScheduledDay	AppointmentDay	Age	Neighbourhood	Hypertension	Diabetes	No-show	PatientNotes	PatientSentiment	NoShowReason
0	2.987250e+13	5642903	F	2016-04-29T18:38:08Z	2016-04-29T00:00:00Z	62	JARDIM DA PENHA	1	0	No	Patient with poorly controlled hypertension (s...	Patient is worried about long-term effects of ...	Positive experiences with clinic staff, such a...
1	5.589978e+14	5642503	M	2016-04-29T16:08:27Z	2016-04-29T00:00:00Z	56	JARDIM DA PENHA	0	0	No	Elderly patient. Discussed fall prevention str...	Confusion about insurance coverage and billing...	A clear understanding of their health status, ...
2	4.262962e+12	5642549	F	2016-04-29T16:19:04Z	2016-04-29T00:00:00Z	62	MATA DA PRAIA	0	0	No	Patient with hypertension is following a low-s...	Anxiety and confusion about diabetes care cont...	A clear understanding of their health status, ...
3	8.679512e+11	5642828	F	2016-04-29T17:29:31Z	2016-04-29T00:00:00Z	8	PONTAL DE CAMBURI	0	0	No	Child accompanied by parent/guardian. Reviewed...	Patient is worried about memory loss and manag...	The patient is committed to managing chronic c...
4	8.841186e+12	5642494	F	2016-04-29T16:07:23Z	2016-04-29T00:00:00Z	56	JARDIM DA PENHA	1	1	No	The patient is managing type 2 diabetes with M...	Fear of medication side effects and doubts abo...	The patient prioritizes following medical advi...

	PatientId	AppointmentID	Age	Scholarship	Hypertension	Diabetes	Alcoholism	Handcap	SMS_received
count	1.105270e+05	1.105270e+05	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000
mean	1.474963e+14	5.675305e+06	37.088874	0.098266	0.197246	0.071865	0.030400	0.022248	0.321026
std	2.560949e+14	7.129575e+04	23.110205	0.297675	0.397921	0.258265	0.171686	0.161543	0.466873
min	3.921784e+04	5.030230e+06	-1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	4.172614e+12	5.640286e+06	18.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	3.173184e+13	5.680573e+06	37.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
75%	9.439172e+13	5.725524e+06	55.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000
max	9.999816e+14	5.790484e+06	115.000000	1.000000	1.000000	1.000000	1.000000	4.000000	1.000000

	Gender	ScheduledDay	AppointmentDay	Age	Neighbourhood	Hypertension	Diabetes	...	PatientNotes	PatientSentiment	NoShowReason	WaitDays	anxiety	stress	confusion	fear
0	0	2016-04-29 18:38:08+00:00	2016-04-29 00:00:00+00:00	62.0	JARDIM DA PENHA	1	0	...	Patient with poorly controlled hypertension (s...	Patient is worried about long-term effects of ...	Positive experiences with clinic staff, such a...	-1	0	1	1	1
1	1	2016-04-29 16:08:27+00:00	2016-04-29 00:00:00+00:00	56.0	JARDIM DA PENHA	0	0	...	Elderly patient. Discussed fall prevention str...	Confusion about insurance coverage and billing...	A clear understanding of their health status, ...	-1	0	1	1	0
2	0	2016-04-29 16:19:04+00:00	2016-04-29 00:00:00+00:00	62.0	MATA DA PRAIA	0	0	...	Patient with hypertension is following a low-s...	Anxiety and confusion about diabetes care cont...	A clear understanding of their health status, ...	-1	1	0	1	0
3	0	2016-04-29 17:29:31+00:00	2016-04-29 00:00:00+00:00	8.0	PONTAL DE CAMBURI	0	0	...	Child accompanied by parent/guardian. Reviewed...	Patient is worried about memory loss and manag...	The patient is committed to managing chronic c...	-1	0	0	0	0
4	0	2016-04-29 16:07:23+00:00	2016-04-29 00:00:00+00:00	56.0	JARDIM DA PENHA	1	1	...	The patient is managing type 2 diabetes with M...	Fear of medication side effects and doubts abo...	The patient prioritizes following medical advi...	-1	0	0	0	1

	Gender	Age	Scholarship	Hypertension	Diabetes	Alcoholism	Handcap	SMS_received	No-show	WaitDays	anxiety	stress	confusion	hopeful	fear
count	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000	110527.000000
mean	0.350023	37.088874	0.098266	0.197246	0.071865	0.030400	0.022248	0.321026	0.201933	9.183702	0.491979	0.603273	0.532594	0.106336	0.426412
std	0.476979	23.110205	0.297675	0.397921	0.258265	0.171686	0.161543	0.466873	0.401444	15.254996	0.499938	0.489221	0.498939	0.308269	0.494557
min	0.000000	-1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-7.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	0.000000	18.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-1.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	0.000000	37.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	3.000000	0.000000	1.000000	1.000000	0.000000	0.000000
75%	1.000000	55.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000	14.000000	1.000000	1.000000	1.000000	0.000000	1.000000
max	1.000000	115.000000	1.000000	1.000000	1.000000	1.000000	4.000000	1.000000	1.000000	178.000000	1.000000	1.000000	1.000000	1.000000	1.000000

	F1	Precision	Recall	Accuracy	ROC_AUC
Random Forest (Tuned)	0.447	0.306	0.825	0.587	0.729
Logistic Regression (Tuned)	0.413	0.321	0.580	0.668	0.670
Logistic Regression	0.411	0.321	0.572	0.669	0.669
Random Forest	0.361	0.311	0.430	0.693	0.635
XGBoost (Tuned)	0.056	0.451	0.030	0.797	0.728
XGBoost	0.003	0.857	0.001	0.798	0.728

	WaitDays	SMS_received	Age	Scholarship	Gender	Diabetes	Handcap	Actual	Predicted
23937	-1	0	7.0	0	1	0	0	1	0
99403	26	1	42.0	0	0	0	0	0	1
100162	13	1	3.0	1	1	0	0	0	1
63869	4	1	50.0	0	1	0	0	1	0
7668	-1	0	59.0	1	1	0	0	0	0
45098	-1	0	0.0	0	1	0	0	0	0
28787	13	0	37.0	0	0	0	0	1	1
25073	-1	0	71.0	0	0	1	0	0	0
28854	17	1	44.0	0	0	1	2	1	0
24559	3	0	60.0	0	0	1	0	0	1

Model	F1 Score	Precision	Recall	ROC AUC
Random Forest (Tuned)	0.447	0.306	0.825	0.729
Logistic Regression	0.411	0.321	0.572	0.669
XGBoost (Tuned)	0.056	0.451	0.030	0.728

	k	Silhouette Score	Davies-Bouldin Score	Calinski-Harabasz Score
0	2	0.149469	2.362978	17040.848357
1	3	0.159410	1.979498	17031.371113
2	4	0.156826	1.926502	15220.871132
3	5	0.148039	2.144950	12514.393745
4	6	0.143271	2.133657	11918.065318
5	7	0.151726	2.053565	12000.052416

	k	Silhouette Score	Davies-Bouldin Score	Calinski-Harabasz Score
0	2	0.123373	2.854364	11978.682032
1	3	0.136138	2.081934	15652.588788
2	4	0.108175	2.442411	10218.258226
3	5	0.131484	2.291962	10885.967972
4	6	0.146783	2.028766	11463.829010
5	7	0.129871	2.090993	11068.446895

Patient Appointments No-Show Prediction and Analysis¶

Index¶

Streamlit Application¶

1. Introduction: ⇧¶

2. Objective: ⇧¶

Problem Statement¶

3. Project Architecture: ⇧¶

3. Dataset:¶

4. Data Cleaning and Pre-processing: ⇧¶

5. Exploratory Data Analysis: ⇧¶

6. Model Selection: ⇧¶

Supervised Learning Models¶

Unsupervised Learning Models¶

Natural Language Processing (NLP) Models¶

6.1 Supervised Learning - Patient Show/No Show Prediction: ⇧¶

6.1.1 Logistic Regression: ⇧¶

6.1.2 Random Forest: ⇧¶

6.1.3 Gradient Boosting-XGBoost: ⇧¶

6.1.4 Feature Engineering: ⇧¶

6.1.5 Model Training: ⇧¶

6.1.6 Hyper Parameter Tuning: ⇧¶

Logistic Regression¶

Random Forest¶

XGBoost¶

6.1.7 Model Evaluation: ⇧¶

Why SMOTE is Needed for This Dataset¶

6.1.8 Model Analysis: ⇧¶

6.1.7 Conclusion: ⇧¶

6.2 Unsupervised Learning - Understanding Patient Profile: ⇧¶

6.2.1 Feature Engineering: ⇧¶

6.2.2 Dimensionality Reduction: ⇧¶

6.2.3 Optimal Clusters (k) - Elbow Method Analysis: ⇧¶

Elbow Curve¶

6.2.4 K-Means Clustering: ⇧¶

6.2.5 Gaussian Mixture Model (GMM): ⇧¶

6.2.6 Model Analysis: ⇧¶

6.2.7 Conclusion: ⇧¶

6.3 NLP- Patient Sentiment Analysis: ⇧¶

6.3.1 Feature Engineering: ⇧¶

6.3.2 Model Training: ⇧¶

6.3.3 Model Evaluation: ⇧¶

Interpretation of Emotion Classification Metrics¶

6.3.4 Hyper Parameter Tuning: ⇧¶

Model Performance and Analysis: Emotion Classification from Patient Sentiment Data¶

6.3.5 Model Testing: ⇧¶

6.3.6 Model Analysis & Optimization: ⇧¶

6.3.7 Conclusion: ⇧¶

6.4 NLP- Patient Notes Topic Modeling: ⇧¶

Models Selected for Topic Modeling¶

6.4.1 Feature Engineering: ⇧¶

6.4.2 Model Training: ⇧¶

Key Configurations and Parameters¶

6.4.3 Model Evaluation: ⇧¶

6.4.4 Model Analysis: ⇧¶

6.4.5 Conclusion: ⇧¶

7. Deployment: ⇧¶

Deployment using Streamit app¶

Streamlit Application¶

Deployment on Streamlit Community Cloud¶

Devcontainer & Docker Containers¶

Other Deployment and MLOps methods¶

Extending Models as Flask API and Cloud Deployment¶

8. Conclusion: ⇧¶

9. Recommendations: ⇧¶

10. Next Steps: ⇧¶

11. References: ⇧¶