# ------------------ GLOBAL SETTINGS -----------------
# LOAD PACKAGES
require(ggplot2)
require(RColorBrewer)
require(gridExtra)
require(xtable)
require(huge)


# Check, if the user is already in 'results' folder, otherwise change folder
# This will obviously not work, if the user is not at least in the parent folder
# if (length(grep(x=getwd(), pattern="results")) == 0) {
#     setwd("results")
# }

res.raw <- readRDS("results/results_2022-21-01-22:38:03")
res <- res.raw$data

# Convert string entries into factors
res$pkg <- as.factor(res$pkg)
res$nw.type <- as.factor(res$nw.type)
res$classifier <- as.factor(res$classifier)
res$nodes <- as.factor(res$nodes)
res$edges <- as.factor(res$edges)

# Data frame for pooled results
scale_AUC <- data.frame(name=as.factor(rep("AUC", nrow(res))))
scale_PR_AUC <- data.frame(name=as.factor(rep("PR-AUC", nrow(res))))
scale_BCR <- data.frame(name=as.factor(rep("BCR", nrow(res))))
scale_norm_MCC <- data.frame(name=as.factor(rep("norm-MCC", nrow(res))))
pooled <- rbind(cbind(pkg=res$pkg, scale=res$AUC, name=scale_AUC, nw.type=res$nw.type, nodes=res$node, classifier=res$classifier, edges=res$edges),
                cbind(pkg=res$pkg, scale=res$PR.AUC, name=scale_PR_AUC, nw.type=res$nw.type, nodes=res$node, classifier=res$classifier, edges=res$edges),
                cbind(pkg=res$pkg, scale=res$BCR, name=scale_BCR, nw.type=res$nw.type, nodes=res$node, classifier=res$classifier, edges=res$edges),
                cbind(pkg=res$pkg, scale=res$norm_MCC, name=scale_norm_MCC, nw.type=res$nw.type, nodes=res$node, classifier=res$classifier, edges=res$edges))

# Define global plotting settings
theme_set(theme_bw(base_size=16, base_family = 'Helvetica'))

# Color palette
palette_nw <- c(brewer.pal(n=8, "Set2")[c(1:3, 7)])

# Define costum colour palettes
pl <- c(brewer.pal(n=8, "Blues")[5], brewer.pal(n=9, "Reds")[5])

numNodes <- ggplot(data = pooled, aes(x = nodes, y = scale, fill = pkg)) +
    stat_boxplot(geom="errorbar", position = position_dodge(width = 0.75), width = 0.4) +
    geom_boxplot(outlier.shape=4, outlier.alpha=0.8) +
    geom_abline(intercept=0.5, slope=0, colour="darkorange2", size=.5, linetype="longdash") +
    scale_fill_manual(values=pl) +
    theme_bw(base_size = 16) +
    theme(legend.position = "bottom") +
    labs(tag=bquote(~bold("A"))) +
    # guides(fill="none", colour="none") +
    facet_wrap(facets = ~ name, ncol = 4)

resNW <- ggplot(data = pooled, aes(x = nw.type, y = scale, fill = pkg)) +
    stat_boxplot(geom="errorbar", position = position_dodge(width = 0.75), width = 0.4) +
    geom_boxplot(outlier.shape=4, outlier.alpha=0.8) +
    geom_abline(intercept=0.5, slope=0, colour="darkorange2", size=.5, linetype="longdash") +
    scale_fill_manual(values=pl) +
    theme_bw(base_size = 16) +
    theme(legend.position = "bottom") +
    labs(tag=bquote(~bold("B"))) +
    facet_wrap(facets = ~ name, ncol = 4)

resClass <- ggplot(data = pooled, aes(x = classifier, y = scale, fill = pkg)) +
    stat_boxplot(geom="errorbar", position = position_dodge(width = 0.75), width = 0.4) +
    geom_boxplot(outlier.shape=4, outlier.alpha=0.8) +
    geom_abline(intercept=0.5, slope=0, colour="darkorange2", size=.5, linetype="longdash") +
    scale_fill_manual(values=pl) +
    theme_bw(base_size = 16) +
    theme(legend.position = "bottom", axis.text.x=element_text(angle=-25, vjust=0, hjust=0.5)) +
    labs(tag=bquote(~bold("C"))) +
    # guides(fill="none", colour="none") +
    facet_wrap(facets = ~ name, ncol = 4)

results <- grid.arrange(
    numNodes, resNW, resClass,
    layout_matrix=rbind(c(1, 1), c(2, 2), c(3, 3))
)
ggsave("../figure/Figure7.pdf", width=17, height=15, results)

# Get number of samples for each network topology
# we generated 40 samples each per nw.type := 40 * 4 = 160, we did that for 4 sizes := 160 * 4 = 640
res2 <- res.raw$data
smpl <- data.frame(
    nw.type=rep(c("random", "scale-free", "hub", "cluster"), each=160),
    nodes=as.factor(rep(c(20, 40, 80, 400))),
    pkg=rep(c("huge", "mcgraph"), each=640)
)

# Plot settings
settingArr <- ggplot(data = smpl, aes(nodes, nw.type, fill=nw.type, colour=nw.type)) +
    stat_sum(alpha=1, show.legend=TRUE) +
    guides(colour="none", fill="none", size=guide_legend("count")) +
    theme_bw(base_size=16, base_family = 'Helvetica') +
    theme(legend.position="right") +
    #labs(tag=bquote(~bold("A"))) +
    scale_fill_manual(values=palette_nw) +
    scale_colour_manual(values=palette_nw) +
    facet_wrap(~ pkg, ncol=4)

ggsave("../figure/Figure5.pdf", width=7, height=5, settingArr)

# if (length(grep(x=getwd(), pattern="results")) == 1) {
#     setwd("..")
# }

# Fig. 8C, FINAL RESULTS TABLE
# t-Statistics
auc.t <- t.test(AUC ~ pkg, res)
pr.auc.t <- t.test(PR.AUC ~ pkg, res)
bcr.t <- t.test(BCR ~ pkg, res)
norm_mcc.t <- t.test(norm_MCC ~ pkg, res)
# Tables
auc.p <- if (auc.t$p.value < 0.0001) 0
pr.auc.p <- if (pr.auc.t$p.value < 0.0001) 0
bcr.p <- if (bcr.t$p.value < 0.0001) 0
norm_mcc.p <- if (norm_mcc.t$p.value < 0.0001) 0
res_tab <- data.frame(
  huge=round(c(auc.t$estimate[[1]], pr.auc.t$estimate[[1]], bcr.t$estimate[[1]], norm_mcc.t$estimate[[1]]), 3),
  mcgraph=round(c(auc.t$estimate[[2]], pr.auc.t$estimate[[2]], bcr.t$estimate[[2]], norm_mcc.t$estimate[[2]]), 3),
  delta=round(c(-diff(auc.t$estimate), -diff(pr.auc.t$estimate), -diff(bcr.t$estimate), -diff(norm_mcc.t$estimate)), 3),
  stderr=round(c(auc.t$stderr, pr.auc.t$stderr, bcr.t$stderr, norm_mcc.t$stderr), 3),
  CI95_lb=round(c(auc.t$conf.int[[1]], pr.auc.t$conf.int[[1]], bcr.t$conf.int[[1]], norm_mcc.t$conf.int[[1]]), 3),
  CI95_ub=round(c(auc.t$conf.int[[2]], pr.auc.t$conf.int[[2]], bcr.t$conf.int[[2]], norm_mcc.t$conf.int[[2]]), 3),
  tvalue=round(c(auc.t$statistic, pr.auc.t$statistic, bcr.t$statistic, norm_mcc.t$statistic), 3),
  pvalue=c(format(auc.p, nsmall=3, digits=3),
           format(pr.auc.p, nsmall=3, digits=3),
           format(bcr.p, nsmall=3, digits=3),
           format(norm_mcc.p, nsmall=3, digits=3)),
  siglevel=c("***", "***", "***", "***"),
  df=c(auc.t$parameter, pr.auc.t$parameter, bcr.t$parameter, norm_mcc.t$parameter))
# caption <- c("\\label{tab:ttesttable}\\textbf{Comparison of the overall network reconstruction quality between \\emph{huge} and \\emph{mcgraph}.}
# Network reconstructions were evaluated based on the means of the AUC, the PR-AUC, the BCR and the normalized MCC for
# \\emph{huge} and \\emph{mcgraph}, assumed as class 1 and class 2, respectively. For each metrics a unpaired
# Welch two sample \\emph{t}-test for a significance value of $\\alpha = 0.05$ is done.
# Next to the mean values for the packages and the standard error SE, the difference of the means $\\Delta\\bar{x}$ is given.
# The p-values are shown  with their level of significance, as well as the degrees of freedom df.
# $p$-values $< 0.0001$ have a significance level of ***.")
# rownames(res_tab) <- c("AUC", "PR-AUC", "BCR", "norm-MCC")
# colnames(res_tab) <- c("$\\bar{x}_{\\mathrm{huge}}$", "$\\bar{x}_{\\mathrm{mcgraph}}$", "$\\Delta \\bar{x}$", "SE", "$\\mathrm{CI}95_{\\mathrm{lb}}$", "$\\mathrm{CI95}_{\\mathrm{ub}}$", "$t$-value", "$p$-value", "sig-level", "df")
# # Print table
# x.tab <- xtable(res_tab, align="lccccrccc", digits=3, caption=caption)
# label(x.tab) <- c("t_test_table")
# print(x.tab,
#     include.rownames=TRUE,
#     include.colnames=TRUE,
#     xtable.hline.after=c(-1,0,nrow(res_tab)))