Sliding window benchmarking
Below is a comparison of 4 machine learning models (LDA, SVC, kNN, ANN) on the 5 datasets with a subject-independent approach (testing with unseen subjects), with a 2-second sliding window on the epochs to split the data into more examples [1].
import datetime
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
import torch
from scipy import stats
from benchnirs.load import load_dataset
from benchnirs.process import process_epochs, extract_features
from benchnirs.learn import machine_learn, deep_learn
ALL_DATA_PATH = '../../data/dataset_' # path to the datasets
DATASETS = {'herff_2014_nb': ['1-back', '2-back', '3-back'],
'shin_2018_nb': ['0-back', '2-back', '3-back'],
'shin_2018_wg': ['baseline', 'word generation'],
'shin_2016_ma': ['baseline', 'mental arithmetic'],
'bak_2019_me': ['right', 'left', 'foot']}
CONFIDENCE = 0.05 # stat confidence at 95 %
start_time = datetime.datetime.now()
date = start_time.strftime('%Y_%m_%d_%H%M')
out_folder = f'./results/sliding_window_{date}'
os.makedirs(out_folder)
print(f'Main output folder: {out_folder}/')
print(f'Number of GPUs: {torch.cuda.device_count()}')
with open(f'{out_folder}/summary.md', 'w') as w:
w.write('# Accuracy table\n\n(Standard deviation on the cross-validation)')
w.write('\n\n|Dataset|Chance level|')
w.write('LDA (sd)|SVC (sd)|kNN (sd)|ANN (sd)|\n')
w.write('|:---:|:---:|:---:|:---:|:---:|:---:|\n')
with open(f'{out_folder}/results.csv', 'w') as w:
w.write('dataset;model;fold;accuracy;hyperparameters\n')
dict_accuracies = {}
for dataset in DATASETS.keys():
print(f'=====\n{dataset}\n=====')
data_path = f'{ALL_DATA_PATH}{dataset[:-3]}/'
out_path = f'{out_folder}/{dataset}_'
# Load and preprocess data
epochs = load_dataset(dataset, data_path, bandpass=[0.01, 0.5],
baseline=(-2, 0), roi_sides=True, tddr=True)
classes = DATASETS[dataset]
epochs_lab = epochs[classes]
# Run models
nirs, labels, groups = process_epochs(epochs_lab, tmax=9.9, tslide=2)
nirs_features = extract_features(nirs, ['mean', 'std', 'slope'])
lda, hps_lda, _ = machine_learn(
'lda', nirs_features, labels, groups, output_folder=f'{out_path}lda')
svc, hps_svc, _ = machine_learn(
'svc', nirs_features, labels, groups, output_folder=f'{out_path}svc')
knn, hps_knn, _ = machine_learn(
'knn', nirs_features, labels, groups, output_folder=f'{out_path}knn')
ann, hps_ann, _ = deep_learn(
'ann', nirs_features, labels, groups, output_folder=f'{out_path}ann')
# Write results
results = {'LDA': [lda, hps_lda], 'SVC': [svc, hps_svc],
'kNN': [knn, hps_knn], 'ANN': [ann, hps_ann]}
chance_level = np.around(1/len(classes), decimals=3)
w_summary = open(f'{out_folder}/summary.md', 'a')
w_results = open(f'{out_folder}/results.csv', 'a')
w_summary.write(f'|{dataset}|{chance_level}|')
for model in results.keys():
w_summary.write(
f'{np.around(np.mean(results[model][0]), decimals=3)} '
f'({np.around(np.std(results[model][0]), decimals=3)})|')
for fold, accuracy in enumerate(results[model][0]):
hps = results[model][1][fold]
w_results.write(f'{dataset};{model};{fold+1};{accuracy};"{hps}"\n')
w_summary.write('\n')
w_summary.close()
w_results.close()
dict_accuracies[dataset] = np.concatenate((lda, svc, knn, ann))
dict_accuracies['Model'] = list(np.repeat(list(results.keys()), len(lda)))
df_accuracies = pd.DataFrame(dict_accuracies)
df_accuracies = df_accuracies.melt(
id_vars=['Model'], value_vars=list(DATASETS.keys()),
var_name='Dataset', value_name='Accuracy')
plt.figure(figsize=(12, 6))
sns.barplot(data=df_accuracies, y='Accuracy', x='Dataset', hue='Model',
capsize=.1, palette='colorblind')
plt.axhline(1/3, color='red', linestyle='--', label='3 classes chance level')
plt.axhline(1/2, color='blue', linestyle=':', label='2 classes chance level')
plt.legend(bbox_to_anchor=(1.01, 0.5), loc=6)
plt.savefig(f'{out_folder}/summary.png', bbox_inches='tight', dpi=1200)
plt.close()
# Stats
print('Stats...')
with open(f'{out_folder}/stats.md', 'w') as w:
df = pd.read_csv(f'{out_folder}/results.csv', delimiter=';')
w.write('## Comparison of model accuracies to chance level\n\n')
w.write('|Dataset|Model|Shapiro p-value|Test|Statistic|p-value|\n')
w.write('|:---:|:---:|:---:|:---:|:---:|:---:|\n')
anova_table = ''
for dataset in DATASETS.keys():
dataset_accuracies = []
chance_level = 1 / len(DATASETS[dataset])
normality = True
for model in results.keys():
w.write(f'|{dataset}|{model}|')
sub_df = df[(df['dataset'] == dataset) & (df['model'] == model)]
accuracies = sub_df['accuracy'].to_numpy()
dataset_accuracies.append(accuracies)
# Check normality of the distribution
_, p_shap = stats.shapiro(accuracies)
w.write(f'{p_shap}|')
if p_shap > CONFIDENCE:
# t-test
s_tt, p_tt = stats.ttest_1samp(accuracies, chance_level,
alternative='greater')
w.write(f't-test|{s_tt}|{p_tt}|\n')
else:
normality = False
# Wilcoxon
s_wilcox, p_wilcox = stats.wilcoxon(accuracies-chance_level,
alternative='greater')
w.write(f'Wilcoxon|{s_wilcox}|{p_wilcox}|\n')
_, p_bart = stats.bartlett(*dataset_accuracies)
if normality and (p_bart > CONFIDENCE):
s_anova, p_anova = stats.f_oneway(*dataset_accuracies)
anova_table += f'|{dataset}|{p_bart}|ANOVA|{s_anova}|{p_anova}|\n'
else:
s_kru, p_kru = stats.kruskal(*dataset_accuracies)
anova_table += f'|{dataset}|{p_bart}|Kruskal|{s_kru}|{p_kru}|\n'
w.write('\n\n## Comparison of model accuracies to each other\n\n')
w.write('|Dataset|Bartlett p-value|Test|Statistic|p-value|\n')
w.write(f'|:---:|:---:|:---:|:---:|:---:|\n{anova_table}')
end_time = datetime.datetime.now()
elapsed_time = end_time - start_time
print(f'===\nElapsed time: {elapsed_time}')
References