%pip install -q -r requirements.txt lightgbm optuna shap

Note: you may need to restart the kernel to use updated packages.

# suppress warnings for cleaner notebook output
import warnings
warnings.filterwarnings("ignore")

# force-reload src.modeling modules so code changes are always picked up
%load_ext autoreload
%autoreload 2

import json
import gc
import time
from pathlib import Path
from typing import Any, Dict, List, Optional

import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import shap
from sklearn.utils.class_weight import compute_sample_weight
from xgboost import XGBClassifier
from IPython.display import display

from sklearn.preprocessing import StandardScaler

import shutil

from src.modeling import (
    N_CLASSES,
    NOVA_LABELS,
    PRIMARY_METRIC,
    RANDOM_STATE,
    FEATURES_ARTIFACTS_DIRECTORY,
    MODEL_TRAINING_ARTIFACTS_DIRECTORY,
    MODEL_TUNING_ARTIFACTS_DIRECTORY,
    API_MODEL_RESULTS_DIRECTORY,
    HyperparameterTuner,
    MetricsEvaluator,
    ModelPlotter,
    ModelResult,
    ModelRunner,
)

np.random.seed(RANDOM_STATE)

# load saved splits and metadata from Feature Engineering notebook (Notebook 02)
splits = joblib.load(FEATURES_ARTIFACTS_DIRECTORY / "data_splits.joblib")

# unpack train/validation/test splits
X_train = splits["X_train"]
X_val = splits["X_val"]
X_test = splits["X_test"]
y_train = splits["y_train"]
y_val = splits["y_val"]
y_test = splits["y_test"]

# feature names and class weights from feature engineering
feature_names = splits["feature_names"]
class_weights_dict = splits["class_weights"]

# load feature engineering metadata to validate consistency
with open(FEATURES_ARTIFACTS_DIRECTORY / "feature_engineering_metadata.json") as f:
    fe_meta = json.load(f)

# load model selection metadata from Notebook 03
with open(MODEL_TRAINING_ARTIFACTS_DIRECTORY / "selected_model_meta.json") as f:
    selection_meta = json.load(f)

# validate feature consistency (11 original + 4 engineered = 15 total)
expected_num_features = len(fe_meta["original_features"]) + len(fe_meta["engineered_features"])
assert len(feature_names) == expected_num_features, \
    f"Feature count mismatch: expected {expected_num_features}, got {len(feature_names)}"

print("Feature Engineering Metadata Validation")
print(f"Original features: {len(fe_meta['original_features'])}")
print(f"Engineered features: {len(fe_meta['engineered_features'])}")
print(f"Total features: {len(feature_names)}")
print(f"\nRemoved from EDA: {', '.join(fe_meta['features_removed_in_eda'])}")

print(f"\nSelected model: {selection_meta['selected_model']}")
print(f"Baseline {PRIMARY_METRIC}: {selection_meta['primary_metric_value']:.4f}")

print(f"\nFeatures ({len(feature_names)}):")
for feat in feature_names:
    print(f"  - {feat}")

print(f"\nData Splits:")
print(f"  Train: {X_train.shape}")
print(f"  Val:   {X_val.shape}")
print(f"  Test:  {X_test.shape}")

# initialize and fit the scaler for later export to backend
scaler = StandardScaler()
scaler.fit(X_train)

Feature Engineering Metadata Validation
Original features: 10
Engineered features: 3
Total features: 13

Removed from EDA: sodium_100g, trans_fat_100g, monounsaturated_fat_100g, polyunsaturated_fat_100g, starch_100g

Selected model: XGBoost
Baseline Macro F1: 0.8601

Features (13):
  - energy_100g
  - fat_100g
  - carbohydrates_100g
  - sugars_100g
  - fiber_100g
  - proteins_100g
  - salt_100g
  - saturated_fat_100g
  - additives_n
  - added_sugars_100g
  - sugar_fiber_ratio
  - fat_protein_ratio
  - additives_per_energy

Data Splits:
  Train: (184159, 13)
  Val:   (61386, 13)
  Test:  (61387, 13)

StandardScaler()

# per-sample weights for XGBoost training (inverse frequency-based)
train_sample_weights = compute_sample_weight("balanced", y_train)

# verify class distributions across splits
for name, y in [("Train", y_train), ("Val", y_val), ("Test", y_test)]:
    counts = y.value_counts().sort_index()
    pcts = (counts / len(y) * 100).round(1)
    dist = ", ".join(f"NOVA {i+1}: {c:,} ({pcts[i]}%)" for i, c in counts.items())
    print(f"{name:5s} ({len(y):,}) -- {dist}")

Train (184,159) -- NOVA 1: 22,221 (12.1%), NOVA 2: 8,144 (4.4%), NOVA 3: 37,478 (20.4%), NOVA 4: 116,316 (63.2%)
Val   (61,386) -- NOVA 1: 7,407 (12.1%), NOVA 2: 2,715 (4.4%), NOVA 3: 12,492 (20.3%), NOVA 4: 38,772 (63.2%)
Test  (61,387) -- NOVA 1: 7,408 (12.1%), NOVA 2: 2,714 (4.4%), NOVA 3: 12,493 (20.4%), NOVA 4: 38,772 (63.2%)

# instantiate shared evaluation objects
evaluator = MetricsEvaluator(labels=NOVA_LABELS, n_classes=N_CLASSES)
runner = ModelRunner(evaluator=evaluator)
plotter = ModelPlotter()

# baseline XGBoost with the same parameters from Notebook 03
xgb_baseline = XGBClassifier(
    objective="multi:softprob",
    num_class=N_CLASSES,
    eval_metric="mlogloss",
    n_estimators=500,
    max_depth=8,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_alpha=0.1,
    reg_lambda=1.0,
    tree_method="hist",
    random_state=RANDOM_STATE,
    verbosity=0,
)

baseline_result = runner.run(
    name="XGBoost (Baseline)",
    model=xgb_baseline,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    fit_kwargs={
        "sample_weight": train_sample_weights,
        "eval_set": [(X_val, y_val)],
        "verbose": False,
    },
)

print(f"Baseline XGBoost -- Macro F1: {baseline_result.metrics['Macro F1']:.4f}  "
      f"Balanced Acc: {baseline_result.metrics['Balanced Accuracy']:.4f}  "
      f"Train: {baseline_result.metrics['Train Time (s)']:.1f}s")
display(baseline_result.per_class)

Baseline XGBoost -- Macro F1: 0.8601  Balanced Acc: 0.8897  Train: 10.8s

# initialize the tuner with early stopping (stops boosting when val loss plateaus)
tuner = HyperparameterTuner(
    n_classes=N_CLASSES,
    random_state=RANDOM_STATE,
    early_stopping_rounds=50,
)

# run Bayesian hyperparameter search (75 trials for broader exploration)
tuning_result = tuner.tune(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    sample_weight=train_sample_weights,
    n_trials=30,
    baseline_score=baseline_result.metrics["Macro F1"],
)

print(f"\nBest trial: #{tuning_result.study.best_trial.number}")
print(f"Best Macro F1: {tuning_result.best_score:.4f}")
print(f"Baseline Macro F1: {tuning_result.baseline_score:.4f}")
print(f"Improvement: {tuning_result.improvement:+.4f}")
print(f"\nBest hyperparameters:")
for k, v in tuning_result.best_params.items():
    print(f"  {k}: {v}")

Best trial: 28. Best value: 0.86453: 100%|██████████| 30/30 [15:35<00:00, 31.17s/it]

Best trial: #28
Best Macro F1: 0.8645
Baseline Macro F1: 0.8601
Improvement: +0.0044

Best hyperparameters:
  n_estimators: 1450
  max_depth: 10
  learning_rate: 0.0672265139552494
  max_leaves: 181
  min_child_weight: 3
  reg_alpha: 0.0026268537269584433
  reg_lambda: 0.0012668861795320405
  gamma: 0.3315749044878783
  max_delta_step: 1
  subsample: 0.6674855217179587
  colsample_bytree: 0.9564715588013275
  colsample_bylevel: 0.8468983274631778
  colsample_bynode: 0.6249540787327926

# optimization history and hyperparameter importance
plotter.plot_optimization_history(tuning_result)

# build the tuned model from the best parameters (includes early stopping)
best_params = tuning_result.best_params.copy()

best_params["n_estimators"] = min(best_params["n_estimators"], 1000)

# construct tuned model
xgb_tuned = tuner.build_tuned_model(best_params)

# evaluate on validation set using the shared pipeline
tuned_result = runner.run(
    name="XGBoost (Tuned)",
    model=xgb_tuned,
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val,
    fit_kwargs={
        "sample_weight": train_sample_weights,
        "eval_set": [(X_val, y_val)],
        "verbose": False,
    },
)

print(
    f"Tuned XGBoost -- Macro F1: {tuned_result.metrics['Macro F1']:.4f}  "
    f"Balanced Acc: {tuned_result.metrics['Balanced Accuracy']:.4f}  "
    f"Train: {tuned_result.metrics['Train Time (s)']:.1f}s"
)

display(tuned_result.per_class)

Tuned XGBoost -- Macro F1: 0.8645  Balanced Acc: 0.8896  Train: 38.6s

# side-by-side comparison of baseline vs. tuned on validation set
comparison_rows = []
for res in [baseline_result, tuned_result]:
    row = {"Model": res.name}
    row.update(res.metrics)
    comparison_rows.append(row)

comparison_df = pd.DataFrame(comparison_rows)

print("Baseline vs. Tuned (Validation Set)")
display(comparison_df.style.format({
    c: "{:.4f}" for c in comparison_df.columns if c != "Model"
}).highlight_max(
    subset=[PRIMARY_METRIC, "Balanced Accuracy", "Weighted F1"],
    color="#f7376d"
))

Baseline vs. Tuned (Validation Set)

# per-class F1 comparison: baseline vs. tuned
plotter.plot_baseline_vs_tuned(baseline_result, tuned_result, NOVA_LABELS)

# evaluate the tuned model on the held-out test set
t0 = time.perf_counter()
y_test_pred = xgb_tuned.predict(X_test)
test_inference_time = time.perf_counter() - t0

y_test_proba = xgb_tuned.predict_proba(X_test)

# compute test metrics using the shared evaluator
test_metrics, test_per_class, test_cm, test_report = evaluator.evaluate(
    y_test, y_test_pred, y_test_proba
)
test_metrics["Inference Time (s)"] = round(test_inference_time, 4)

print("Final Test Set Results (XGBoost Tuned)")
print(f"  Macro F1:          {test_metrics['Macro F1']:.4f}")
print(f"  Balanced Accuracy: {test_metrics['Balanced Accuracy']:.4f}")
print(f"  Weighted F1:       {test_metrics['Weighted F1']:.4f}")
print(f"  Accuracy:          {test_metrics['Accuracy']:.4f}")
if "Log Loss" in test_metrics:
    print(f"  Log Loss:          {test_metrics['Log Loss']:.4f}")
if "ROC-AUC (OVR)" in test_metrics:
    print(f"  ROC-AUC (OVR):     {test_metrics['ROC-AUC (OVR)']:.4f}")

print("\nPer-Class Metrics (Test Set):")
display(test_per_class)

Final Test Set Results (XGBoost Tuned)
  Macro F1:          0.8683
  Balanced Accuracy: 0.8943
  Weighted F1:       0.8818
  Accuracy:          0.8792
  Log Loss:          0.3165
  ROC-AUC (OVR):     0.9760

Per-Class Metrics (Test Set):

# confusion matrix for the final test set evaluation
plotter.plot_confusion_matrix(test_cm, NOVA_LABELS, "XGBoost (Tuned) -- Confusion Matrix (Test Set)")

# compare validation vs. test performance to check for overfitting
val_vs_test = pd.DataFrame([
    {"Split": "Validation", **{k: v for k, v in tuned_result.metrics.items() if k != "Train Time (s)"}},
    {"Split": "Test", **{k: v for k, v in test_metrics.items()}},
])

print("Validation vs. Test Performance (Tuned Model)")
display(val_vs_test.style.format({
    c: "{:.4f}" for c in val_vs_test.columns if c != "Split"
}))

Validation vs. Test Performance (Tuned Model)

# pick random test samples (reproducible) and show actual vs. predicted NOVA class
rng = np.random.RandomState(RANDOM_STATE)
sample_indices = rng.choice(len(X_test), size=10, replace=False)

sample_df = X_test.iloc[sample_indices].copy()
sample_df.insert(0, "True NOVA", [NOVA_LABELS[y] for y in y_test.iloc[sample_indices]])
sample_df.insert(1, "Predicted NOVA", [NOVA_LABELS[y] for y in y_test_pred[sample_indices]])
sample_df.insert(2, "Correct", sample_df["True NOVA"] == sample_df["Predicted NOVA"])

# add predicted probabilities for each class
proba_cols = {label: y_test_proba[sample_indices, i] for i, label in enumerate(NOVA_LABELS)}
for label, probs in proba_cols.items():
    sample_df[f"P({label})"] = probs

correct_count = sample_df["Correct"].sum()
print(f"Sample Predictions ({correct_count}/10 correct)")
display(
    sample_df.style
    .format({f"P({l})": "{:.3f}" for l in NOVA_LABELS})
    .format({c: "{:.2f}" for c in feature_names})
    .map(lambda v: "color: green" if v is True else "color: red", subset=["Correct"])
)

Sample Predictions (9/10 correct)

# compute SHAP values using TreeExplainer — lightweight shap_values() call only
explainer = shap.TreeExplainer(xgb_tuned)
X_val_np = X_val.values.astype(np.float64)

raw_shap = explainer.shap_values(X_val_np)

# diagnostic: inspect exactly what SHAP returned
print(f"Return type: {type(raw_shap)}")
if isinstance(raw_shap, list):
    print(f"List of {len(raw_shap)} arrays, shapes: {[np.array(a).shape for a in raw_shap]}")
    shap_values = [np.array(a) for a in raw_shap]
elif isinstance(raw_shap, np.ndarray):
    print(f"ndarray shape: {raw_shap.shape}")
    if raw_shap.ndim == 3:
        # (samples, features, classes) layout
        shap_values = [raw_shap[:, :, c].copy() for c in range(raw_shap.shape[2])]
    else:
        shap_values = [raw_shap.copy()]
else:
    raise TypeError(f"Unexpected SHAP return: {type(raw_shap)}")

n_features = X_val_np.shape[1]
n_samples = X_val_np.shape[0]

# trim offset column if SHAP appended one
for c in range(len(shap_values)):
    if shap_values[c].shape[1] > n_features:
        shap_values[c] = shap_values[c][:, :n_features]

# expected values for waterfall plots (scalar per class)
shap_expected = explainer.expected_value

# strict shape verification
for c, sv in enumerate(shap_values):
    assert sv.shape == (n_samples, n_features), \
        f"Class {c}: got {sv.shape}, expected ({n_samples}, {n_features})"
    print(f"  Class {c}: {sv.shape} OK")

print(f"\n{len(shap_values)} classes, {n_samples} samples, {n_features} features")
print(f"Expected values: {shap_expected}")

Return type: <class 'numpy.ndarray'>
ndarray shape: (61386, 13, 4)
  Class 0: (61386, 13) OK
  Class 1: (61386, 13) OK
  Class 2: (61386, 13) OK
  Class 3: (61386, 13) OK

4 classes, 61386 samples, 13 features
Expected values: [np.float32(-0.11093068), np.float32(0.36178556), np.float32(0.08899606), np.float32(-0.14272425)]

# global feature importance: mean |SHAP| across all classes
shap.summary_plot(
    shap_values,
    X_val_np,
    feature_names=feature_names,
    plot_type="bar",
    class_names=NOVA_LABELS,
    show=False,
    max_display=20,
)
plt.title("Global Feature Importance (Mean |SHAP|)")
plt.tight_layout()
plt.show()

# per-class SHAP beeswarm plots
for i, label in enumerate(NOVA_LABELS):
    sv_i = np.ascontiguousarray(shap_values[i], dtype=np.float64)
    data_i = np.ascontiguousarray(X_val_np, dtype=np.float64)

    assert sv_i.shape == data_i.shape, \
        f"SHAPE MISMATCH for {label}: shap {sv_i.shape} != data {data_i.shape}"
    print(f"\n--- {label} --- shap: {sv_i.shape}, data: {data_i.shape}")

    shap.summary_plot(
        sv_i,
        data_i,
        feature_names=list(feature_names),
        show=False,
        max_display=15,
    )
    plt.title(f"SHAP Values -- {label}")
    plt.tight_layout()
    plt.show()

--- NOVA 1 --- shap: (61386, 13), data: (61386, 13)

--- NOVA 2 --- shap: (61386, 13), data: (61386, 13)

--- NOVA 3 --- shap: (61386, 13), data: (61386, 13)

--- NOVA 4 --- shap: (61386, 13), data: (61386, 13)

# identify top 3 most important features globally for dependence plots
mean_abs_shap = np.mean([np.abs(sv) for sv in shap_values], axis=0).mean(axis=0)
top_feature_indices = np.argsort(mean_abs_shap)[::-1][:3]
top_features = [feature_names[i] for i in top_feature_indices]

print(f"Top 3 features for dependence analysis: {top_features}")

# dependence plots for NOVA 4 (majority class / ultra-processed)
nova4_idx = 3  # NOVA 4 = class index 3
for feat in top_features:
    shap.dependence_plot(
        feat,
        shap_values[nova4_idx],
        X_val_np,
        feature_names=feature_names,
        show=False,
    )
    plt.title(f"SHAP Dependence: {feat} (NOVA 4)")
    plt.tight_layout()
    plt.show()

Top 3 features for dependence analysis: ['additives_n', 'added_sugars_100g', 'salt_100g']

# select one representative sample from each NOVA class in the validation set
y_val_pred = xgb_tuned.predict(X_val_np)

for cls_idx, label in enumerate(NOVA_LABELS):
    # find a correctly predicted sample for this class
    mask = (y_val.values == cls_idx) & (y_val_pred == cls_idx)
    if mask.sum() == 0:
        print(f"No correctly predicted samples for {label}, skipping.")
        continue

    sample_idx = np.where(mask)[0][0]
    bv = shap_expected[cls_idx] if hasattr(shap_expected, '__len__') else shap_expected

    print(f"\n--- {label} (sample #{sample_idx}) ---")
    explanation = shap.Explanation(
        values=shap_values[cls_idx][sample_idx],
        base_values=bv,
        data=X_val_np[sample_idx],
        feature_names=list(feature_names),
    )
    shap.waterfall_plot(explanation, show=False, max_display=12)
    plt.title(f"Prediction Explanation -- {label}")
    plt.tight_layout()
    plt.show()

--- NOVA 1 (sample #5) ---

--- NOVA 2 (sample #9) ---

--- NOVA 3 (sample #8) ---

--- NOVA 4 (sample #0) ---

# build a structured SHAP importance table
shap_importance_rows = []
for i, label in enumerate(NOVA_LABELS):
    mean_abs = np.abs(shap_values[i]).mean(axis=0)
    for j, feat in enumerate(feature_names):
        shap_importance_rows.append({
            "Feature": feat,
            "Class": label,
            "Mean |SHAP|": mean_abs[j],
        })

shap_df = pd.DataFrame(shap_importance_rows)
shap_pivot = shap_df.pivot_table(index="Feature", columns="Class", values="Mean |SHAP|")
shap_pivot["Overall"] = shap_pivot.mean(axis=1)
shap_pivot = shap_pivot.sort_values("Overall", ascending=False)

print("SHAP Feature Importance by Class (Mean |SHAP| on Validation Set)")
display(shap_pivot.style.format("{:.4f}").background_gradient(cmap="YlOrRd", axis=None))

SHAP Feature Importance by Class (Mean |SHAP| on Validation Set)

# ensure fresh artifacts every run, removes stale artifacts
for d in [MODEL_TUNING_ARTIFACTS_DIRECTORY, API_MODEL_RESULTS_DIRECTORY]:
    if d.exists():
        shutil.rmtree(d)
    d.mkdir(parents=True, exist_ok=True)

print("Exporting artifacts...")

# core model for both
model_filename = "xgb_tuned.json"

# save for tuning artifacts
model_path = MODEL_TUNING_ARTIFACTS_DIRECTORY / model_filename
xgb_tuned.save_model(str(model_path))
print(f"Saved: {model_path}")

# save for backend/API
backend_model_path = API_MODEL_RESULTS_DIRECTORY / model_filename
xgb_tuned.save_model(str(backend_model_path))
print(f"Saved: {backend_model_path}")

# scaler for gui/api
scaler_path = API_MODEL_RESULTS_DIRECTORY / "final_scaler.joblib"
joblib.dump(scaler, scaler_path)
print(f"Saved: {scaler_path}")

# feature names, for api input validation
features_path = API_MODEL_RESULTS_DIRECTORY / "feature_names.json"
with open(features_path, "w") as f:
    json.dump(list(feature_names), f, indent=2)
print(f"Saved: {features_path}")

# hyperparameters
hp_path = MODEL_TUNING_ARTIFACTS_DIRECTORY / "tuned_hyperparameters.json"
with open(hp_path, "w") as f:
    json.dump(tuning_result.best_params, f, indent=2)
print(f"Saved: {hp_path}")

# test metrics + metadata
test_meta = {
    "model": "XGBoost (Tuned)",
    "split": "test",
    "n_trials": tuning_result.n_trials,
    "baseline_macro_f1": round(tuning_result.baseline_score, 4),
    "tuned_val_macro_f1": round(tuned_result.metrics["Macro F1"], 4),
    "test_metrics": {k: round(float(v), 4) for k, v in test_metrics.items()},
    "features": {
        "total_count": len(feature_names),
        "original_features": fe_meta["original_features"],
        "engineered_features": fe_meta["engineered_features"],
        "removed_features": fe_meta["features_removed_in_eda"],
        "all_features": list(feature_names),
    },
}

test_meta_path = MODEL_TUNING_ARTIFACTS_DIRECTORY / "test_evaluation_meta.json"
with open(test_meta_path, "w") as f:
    json.dump(test_meta, f, indent=2)
print(f"Saved: {test_meta_path}")

# per-class metrics
test_per_class_path = MODEL_TUNING_ARTIFACTS_DIRECTORY / "test_per_class_metrics.csv"
test_per_class.to_csv(test_per_class_path, index=False)
print(f"Saved: {test_per_class_path}")

# SHAP importance, for gui 
shap_path = MODEL_TUNING_ARTIFACTS_DIRECTORY / "shap_feature_importance.csv"
shap_pivot.to_csv(shap_path)
print(f"Saved: {shap_path}")

# succesful
print("\nAll artifacts exported successfully for GUI + backend")

Exporting artifacts...
Saved: results\tuning\xgb_tuned.json
Saved: backend\models\xgb_tuned.json
Saved: backend\models\final_scaler.joblib
Saved: backend\models\feature_names.json
Saved: results\tuning\tuned_hyperparameters.json
Saved: results\tuning\test_evaluation_meta.json
Saved: results\tuning\test_per_class_metrics.csv
Saved: results\tuning\shap_feature_importance.csv

All artifacts exported successfully for GUI + backend

	Class	Precision	Recall	F1	Support
0	NOVA 1	0.800869	0.920346	0.856461	7407.0
1	NOVA 2	0.887114	0.940700	0.913121	2715.0
2	NOVA 3	0.702131	0.828370	0.760044	12492.0
3	NOVA 4	0.956094	0.869416	0.910697	38772.0

	Class	Precision	Recall	F1	Support
0	NOVA 1	0.811057	0.915080	0.859934	7407.0
1	NOVA 2	0.892533	0.942173	0.916682	2715.0
2	NOVA 3	0.719134	0.819244	0.765932	12492.0
3	NOVA 4	0.951659	0.881951	0.915480	38772.0

	Model	Accuracy	Balanced Accuracy	Macro Precision	Macro Recall	Macro F1	Weighted Precision	Weighted Recall	Weighted F1	Log Loss	ROC-AUC (OVR)	Train Time (s)	Inference Time (s)
0	XGBoost (Baseline)	0.8704	0.8897	0.8366	0.8897	0.8601	0.8826	0.8704	0.8736	0.3310	0.9741	10.7500	0.2310
1	XGBoost (Tuned)	0.8759	0.8896	0.8436	0.8896	0.8645	0.8848	0.8759	0.8784	0.3258	0.9747	38.6300	0.7390

	Class	Precision	Recall	F1	Support
0	NOVA 1	0.815090	0.918737	0.863815	7408.0
1	NOVA 2	0.892944	0.946573	0.918977	2714.0
2	NOVA 3	0.724314	0.828384	0.772861	12493.0
3	NOVA 4	0.954756	0.883344	0.917663	38772.0

	True NOVA	Predicted NOVA	Correct	energy_100g	fat_100g	carbohydrates_100g	sugars_100g	fiber_100g	proteins_100g	salt_100g	saturated_fat_100g	additives_n	added_sugars_100g	sugar_fiber_ratio	fat_protein_ratio	additives_per_energy	P(NOVA 1)	P(NOVA 2)	P(NOVA 3)	P(NOVA 4)
20110	NOVA 4	NOVA 4	True	1137.90	3.13	51.56	3.12	1.60	7.81	1.67	0.78	6.00	1.56	1.95	0.40	0.01	0.000001	0.000000	0.001882	0.998117
4315	NOVA 4	NOVA 3	False	1059.14	15.05	15.05	0.00	1.08	13.98	1.34	3.76	0.00	0.00	0.00	1.08	0.00	0.001262	0.000060	0.588468	0.410210
241119	NOVA 4	NOVA 4	True	182.00	1.80	5.60	0.00	1.00	0.70	0.11	0.30	3.00	0.00	0.00	2.57	0.02	0.000011	0.000001	0.001037	0.998951
239397	NOVA 4	NOVA 4	True	1428.60	0.20	80.00	0.00	2.20	3.60	0.05	0.00	8.00	0.00	0.00	0.06	0.01	0.000002	0.000000	0.000053	0.999945
38842	NOVA 4	NOVA 4	True	167.33	0.00	0.67	0.00	0.00	8.00	0.11	0.00	2.00	0.00	0.00	0.00	0.01	0.003367	0.000047	0.024098	0.972489
71014	NOVA 4	NOVA 4	True	1867.00	18.00	62.00	54.00	3.50	7.00	0.02	1.60	0.00	33.29	15.43	2.57	0.00	0.000029	0.000013	0.054047	0.945910
111513	NOVA 3	NOVA 3	True	1538.40	30.00	0.20	0.20	0.00	25.00	1.50	20.00	0.00	0.00	0.20	1.20	0.00	0.000113	0.000004	0.996067	0.003815
51684	NOVA 4	NOVA 4	True	736.90	15.00	1.30	1.10	0.00	9.40	0.90	1.30	1.00	0.81	1.10	1.60	0.00	0.000006	0.000043	0.039478	0.960473
149174	NOVA 4	NOVA 4	True	1173.00	23.00	8.90	0.70	1.80	9.70	2.27	10.00	4.00	0.00	0.39	2.37	0.00	0.000002	0.000000	0.000326	0.999671
295554	NOVA 4	NOVA 4	True	520.50	9.54	7.89	2.78	0.05	1.89	1.46	6.36	6.00	0.00	61.24	5.04	0.01	0.000004	0.000000	0.000042	0.999954

Context Aware Nutritional Assessment¶

Predicting Food Processing Tiers through Machine Learning¶

Notebook: 04 - Hyperparameter Tuning and Final Evaluation¶

AAI-590 Capstone Project - University of San Diego¶

Team Members:¶

Objective¶

Environment Setup and Configuration¶

Load Prepared Data¶

Baseline XGBoost Performance¶

Hyperparameter Tuning with Optuna¶

Train Final Tuned Model¶

Final Test Set Evaluation¶

Sample Predictions¶

Interpretation¶

SHAP Explainability¶

Global Feature Importance¶

Per-Class SHAP Analysis¶

SHAP Dependence Plots¶

SHAP Waterfall: Individual Prediction Explanation¶

SHAP Feature Importance Table¶

Export Artifacts¶

AI Use Disclosure¶

	copy copy: bool, default=True If False, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned.	True
	with_mean with_mean: bool, default=True If True, center the data before scaling. This does not work (and will raise an exception) when attempted on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory.	True
	with_std with_std: bool, default=True If True, scale the data to unit variance (or equivalently, unit standard deviation).	True

	Split	Accuracy	Balanced Accuracy	Macro Precision	Macro Recall	Macro F1	Weighted Precision	Weighted Recall	Weighted F1	Log Loss	ROC-AUC (OVR)	Inference Time (s)
0	Validation	0.8759	0.8896	0.8436	0.8896	0.8645	0.8848	0.8759	0.8784	0.3258	0.9747	0.7390
1	Test	0.8792	0.8943	0.8468	0.8943	0.8683	0.8883	0.8792	0.8818	0.3165	0.9760	0.6366

Class	NOVA 1	NOVA 2	NOVA 3	NOVA 4	Overall
Feature
additives_n	1.0858	1.0988	0.5459	0.9484	0.9197
added_sugars_100g	1.7796	0.8213	0.1939	0.5970	0.8479
salt_100g	0.8689	0.7676	0.4092	0.2592	0.5762
additives_per_energy	0.6712	0.3718	0.4178	0.8397	0.5751
proteins_100g	0.3194	1.0949	0.1806	0.1671	0.4405
energy_100g	0.4311	0.6988	0.1475	0.1561	0.3584
fiber_100g	0.2310	0.8500	0.1370	0.1226	0.3351
carbohydrates_100g	0.3359	0.6019	0.1486	0.1964	0.3207
fat_100g	0.3741	0.6031	0.1120	0.1416	0.3077
fat_protein_ratio	0.3261	0.5399	0.1211	0.1136	0.2752
sugars_100g	0.2306	0.5393	0.1341	0.1328	0.2592
saturated_fat_100g	0.2918	0.3015	0.1358	0.1882	0.2293
sugar_fiber_ratio	0.2059	0.4473	0.1087	0.1099	0.2180